aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp164
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/Analysis.cpp56
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp721
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp234
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp34
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h18
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h24
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp477
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h76
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp132
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp24
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h38
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp54
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h4
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h8
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h17
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h8
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp293
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h261
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp189
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h31
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp665
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h121
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp29
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h3
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp44
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h54
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp29
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h9
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp231
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h40
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp152
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h12
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp45
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h54
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h2
-rw-r--r--contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.cpp264
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.h10
-rw-r--r--contrib/llvm/lib/CodeGen/BranchRelaxation.cpp184
-rw-r--r--contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp271
-rw-r--r--contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp326
-rw-r--r--contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGen.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp885
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/DFAPacketizer.cpp28
-rw-r--r--contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp35
-rw-r--r--contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp99
-rw-r--r--contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp755
-rw-r--r--contrib/llvm/lib/CodeGen/ExecutionDomainFix.cpp473
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp35
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp31
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandReductions.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/FaultMaps.cpp22
-rw-r--r--contrib/llvm/lib/CodeGen/FuncletLayout.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/GCRootLowering.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp81
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp41
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp606
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp106
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp44
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp101
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp51
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp11
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp643
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp245
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp23
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp638
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp67
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp41
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp99
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalMerge.cpp75
-rw-r--r--contrib/llvm/lib/CodeGen/IfConversion.cpp75
-rw-r--r--contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp39
-rw-r--r--contrib/llvm/lib/CodeGen/InlineSpiller.cpp90
-rw-r--r--contrib/llvm/lib/CodeGen/InterferenceCache.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp38
-rw-r--r--contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/LexicalScopes.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugValues.cpp261
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp93
-rw-r--r--contrib/llvm/lib/CodeGen/LiveInterval.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervals.cpp143
-rw-r--r--contrib/llvm/lib/CodeGen/LivePhysRegs.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp21
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.h11
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp44
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp52
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRegUnits.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/LiveVariables.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp42
-rw-r--r--contrib/llvm/lib/CodeGen/LoopTraversal.cpp77
-rw-r--r--contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp407
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp65
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.h15
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp150
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIParser.h4
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp34
-rw-r--r--contrib/llvm/lib/CodeGen/MIRPrinter.cpp220
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp231
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp247
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCSE.cpp47
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCombiner.cpp187
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp227
-rw-r--r--contrib/llvm/lib/CodeGen/MachineDominators.cpp35
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunction.cpp19
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp335
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLICM.cpp272
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineOperand.cpp335
-rw-r--r--contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/MachineOutliner.cpp600
-rw-r--r--contrib/llvm/lib/CodeGen/MachinePipeliner.cpp689
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp84
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineScheduler.cpp452
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSink.cpp376
-rw-r--r--contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp58
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp179
-rw-r--r--contrib/llvm/lib/CodeGen/MacroFusion.cpp36
-rw-r--r--contrib/llvm/lib/CodeGen/OptimizePHIs.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/PHIElimination.cpp31
-rw-r--r--contrib/llvm/lib/CodeGen/ParallelCG.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/PatchableFunction.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp81
-rw-r--r--contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp45
-rw-r--r--contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp242
-rw-r--r--contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp195
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBase.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBasic.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocFast.cpp110
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp336
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp75
-rw-r--r--contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp124
-rw-r--r--contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp95
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp11
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp424
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterPressure.cpp11
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterScavenging.cpp33
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp36
-rw-r--r--contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStack.cpp118
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStackColoring.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStackLayout.cpp29
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStackLayout.h4
-rw-r--r--contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAG.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp78
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp3372
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp233
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp80
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp74
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h3
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp337
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp56
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp139
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp268
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h125
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp207
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp188
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h47
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp51
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp193
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp36
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h4
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp837
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp43
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp911
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h46
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp80
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp632
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp918
-rw-r--r--contrib/llvm/lib/CodeGen/ShrinkWrap.cpp171
-rw-r--r--contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp35
-rw-r--r--contrib/llvm/lib/CodeGen/SlotIndexes.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.cpp173
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.h2
-rw-r--r--contrib/llvm/lib/CodeGen/StackColoring.cpp115
-rw-r--r--contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp24
-rw-r--r--contrib/llvm/lib/CodeGen/StackMaps.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/StackProtector.cpp79
-rw-r--r--contrib/llvm/lib/CodeGen/StackSlotColoring.cpp116
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplication.cpp59
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplicator.cpp38
-rw-r--r--contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp31
-rw-r--r--contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp65
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp268
-rw-r--r--contrib/llvm/lib/CodeGen/TargetPassConfig.cpp104
-rw-r--r--contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp73
-rw-r--r--contrib/llvm/lib/CodeGen/TargetSchedule.cpp120
-rw-r--r--contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp25
-rw-r--r--contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp86
-rw-r--r--contrib/llvm/lib/CodeGen/ValueTypes.cpp321
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.cpp73
-rw-r--r--contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp374
-rw-r--r--contrib/llvm/lib/CodeGen/WinEHPrepare.cpp63
-rw-r--r--contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp20
233 files changed, 20206 insertions, 10619 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index ffcb9a09ad73..632ea8e9cdc4 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -35,6 +34,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <map>
@@ -139,10 +139,11 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker(
CriticalPathSet |= CPSet;
}
- DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
- DEBUG(for (unsigned r : CriticalPathSet.set_bits())
- dbgs() << " " << printReg(r, TRI));
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
+ LLVM_DEBUG(for (unsigned r
+ : CriticalPathSet.set_bits()) dbgs()
+ << " " << printReg(r, TRI));
+ LLVM_DEBUG(dbgs() << '\n');
}
AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
@@ -202,9 +203,9 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
PrescanInstruction(MI, Count, PassthruRegs);
ScanInstruction(MI, Count);
- DEBUG(dbgs() << "Observe: ");
- DEBUG(MI.dump());
- DEBUG(dbgs() << "\tRegs:");
+ LLVM_DEBUG(dbgs() << "Observe: ");
+ LLVM_DEBUG(MI.dump());
+ LLVM_DEBUG(dbgs() << "\tRegs:");
std::vector<unsigned> &DefIndices = State->GetDefIndices();
for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
@@ -215,16 +216,16 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
// conservative location (i.e. the beginning of the previous
// schedule region).
if (State->IsLive(Reg)) {
- DEBUG(if (State->GetGroup(Reg) != 0)
- dbgs() << " " << printReg(Reg, TRI) << "=g" <<
- State->GetGroup(Reg) << "->g0(region live-out)");
+ LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs()
+ << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg)
+ << "->g0(region live-out)");
State->UnionGroups(Reg, 0);
} else if ((DefIndices[Reg] < InsertPosIndex)
&& (DefIndices[Reg] >= Count)) {
DefIndices[Reg] = Count;
}
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
}
bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI,
@@ -313,7 +314,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
// subregister definitions).
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI)) {
- DEBUG(if (!header && footer) dbgs() << footer);
+ LLVM_DEBUG(if (!header && footer) dbgs() << footer);
return;
}
@@ -322,9 +323,11 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
DefIndices[Reg] = ~0u;
RegRefs.erase(Reg);
State->LeaveGroup(Reg);
- DEBUG(if (header) {
- dbgs() << header << printReg(Reg, TRI); header = nullptr; });
- DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
+ LLVM_DEBUG(if (header) {
+ dbgs() << header << printReg(Reg, TRI);
+ header = nullptr;
+ });
+ LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
// Repeat for subregisters. Note that we only do this if the superregister
// was not live because otherwise, regardless whether we have an explicit
// use of the subregister, the subregister's contents are needed for the
@@ -336,15 +339,17 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
DefIndices[SubregReg] = ~0u;
RegRefs.erase(SubregReg);
State->LeaveGroup(SubregReg);
- DEBUG(if (header) {
- dbgs() << header << printReg(Reg, TRI); header = nullptr; });
- DEBUG(dbgs() << " " << printReg(SubregReg, TRI) << "->g" <<
- State->GetGroup(SubregReg) << tag);
+ LLVM_DEBUG(if (header) {
+ dbgs() << header << printReg(Reg, TRI);
+ header = nullptr;
+ });
+ LLVM_DEBUG(dbgs() << " " << printReg(SubregReg, TRI) << "->g"
+ << State->GetGroup(SubregReg) << tag);
}
}
}
- DEBUG(if (!header && footer) dbgs() << footer);
+ LLVM_DEBUG(if (!header && footer) dbgs() << footer);
}
void AggressiveAntiDepBreaker::PrescanInstruction(
@@ -367,14 +372,15 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");
}
- DEBUG(dbgs() << "\tDef Groups:");
+ LLVM_DEBUG(dbgs() << "\tDef Groups:");
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
- DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg));
+ LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g"
+ << State->GetGroup(Reg));
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
@@ -383,7 +389,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
// can tell user specified registers from compiler-specified.
if (MI.isCall() || MI.hasExtraDefRegAllocReq() || TII->isPredicated(MI) ||
MI.isInlineAsm()) {
- DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+ LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
State->UnionGroups(Reg, 0);
}
@@ -393,8 +399,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
unsigned AliasReg = *AI;
if (State->IsLive(AliasReg)) {
State->UnionGroups(Reg, AliasReg);
- DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via "
- << printReg(AliasReg, TRI) << ")");
+ LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via "
+ << printReg(AliasReg, TRI) << ")");
}
}
@@ -406,7 +412,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
RegRefs.insert(std::make_pair(Reg, RR));
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
// Scan the register defs for this instruction and update
// live-ranges.
@@ -437,7 +443,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
unsigned Count) {
- DEBUG(dbgs() << "\tUse Groups:");
+ LLVM_DEBUG(dbgs() << "\tUse Groups:");
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
@@ -448,11 +454,11 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
// FIXME: The issue with predicated instruction is more complex. We are being
// conservatively here because the kill markers cannot be trusted after
// if-conversion:
- // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // %r6 = LDR %sp, %reg0, 92, 14, %reg0; mem:LD4[FixedStack14]
// ...
- // STR %r0, killed %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395]
- // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12]
- // STR %r0, killed %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ // STR %r0, killed %r6, %reg0, 0, 0, %cpsr; mem:ST4[%395]
+ // %r6 = LDR %sp, %reg0, 100, 0, %cpsr; mem:LD4[FixedStack12]
+ // STR %r0, killed %r6, %reg0, 0, 14, %reg0; mem:ST4[%396](align=8)
//
// The first R6 kill is not really a kill since it's killed by a predicated
// instruction which may not be executed. The second R6 def may or may not
@@ -469,7 +475,8 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
- DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg));
+ LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g"
+ << State->GetGroup(Reg));
// It wasn't previously live but now it is, this is a kill. Forget
// the previous live-range information and start a new live-range
@@ -477,7 +484,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
HandleLastUse(Reg, Count, "(last-use)");
if (Special) {
- DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+ LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
State->UnionGroups(Reg, 0);
}
@@ -489,12 +496,12 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
RegRefs.insert(std::make_pair(Reg, RR));
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
// Form a group of all defs and uses of a KILL instruction to ensure
// that all registers are renamed as a group.
if (MI.isKill()) {
- DEBUG(dbgs() << "\tKill Group:");
+ LLVM_DEBUG(dbgs() << "\tKill Group:");
unsigned FirstReg = 0;
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
@@ -504,15 +511,15 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
if (Reg == 0) continue;
if (FirstReg != 0) {
- DEBUG(dbgs() << "=" << printReg(Reg, TRI));
+ LLVM_DEBUG(dbgs() << "=" << printReg(Reg, TRI));
State->UnionGroups(FirstReg, Reg);
} else {
- DEBUG(dbgs() << " " << printReg(Reg, TRI));
+ LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI));
FirstReg = Reg;
}
}
- DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n');
+ LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n');
}
}
@@ -535,7 +542,7 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
BV &= RCBV;
}
- DEBUG(dbgs() << " " << TRI->getRegClassName(RC));
+ LLVM_DEBUG(dbgs() << " " << TRI->getRegClassName(RC));
}
return BV;
@@ -562,8 +569,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
// Find the "superest" register in the group. At the same time,
// collect the BitVector of registers that can be used to rename
// each register.
- DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex
- << ":\n");
+ LLVM_DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex
+ << ":\n");
std::map<unsigned, BitVector> RenameRegisterMap;
unsigned SuperReg = 0;
for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
@@ -573,13 +580,13 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
// If Reg has any references, then collect possible rename regs
if (RegRefs.count(Reg) > 0) {
- DEBUG(dbgs() << "\t\t" << printReg(Reg, TRI) << ":");
+ LLVM_DEBUG(dbgs() << "\t\t" << printReg(Reg, TRI) << ":");
BitVector &BV = RenameRegisterMap[Reg];
assert(BV.empty());
BV = GetRenameRegisters(Reg);
- DEBUG({
+ LLVM_DEBUG({
dbgs() << " ::";
for (unsigned r : BV.set_bits())
dbgs() << " " << printReg(r, TRI);
@@ -625,11 +632,11 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(SuperRC);
if (Order.empty()) {
- DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
+ LLVM_DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
return false;
}
- DEBUG(dbgs() << "\tFind Registers:");
+ LLVM_DEBUG(dbgs() << "\tFind Registers:");
RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size()));
@@ -645,7 +652,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
// Don't replace a register with itself.
if (NewSuperReg == SuperReg) continue;
- DEBUG(dbgs() << " [" << printReg(NewSuperReg, TRI) << ':');
+ LLVM_DEBUG(dbgs() << " [" << printReg(NewSuperReg, TRI) << ':');
RenameMap.clear();
// For each referenced group register (which must be a SuperReg or
@@ -662,11 +669,11 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx);
}
- DEBUG(dbgs() << " " << printReg(NewReg, TRI));
+ LLVM_DEBUG(dbgs() << " " << printReg(NewReg, TRI));
// Check if Reg can be renamed to NewReg.
if (!RenameRegisterMap[Reg].test(NewReg)) {
- DEBUG(dbgs() << "(no rename)");
+ LLVM_DEBUG(dbgs() << "(no rename)");
goto next_super_reg;
}
@@ -675,7 +682,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
// must also check all aliases of NewReg, because we can't define a
// register when any sub or super is already live.
if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) {
- DEBUG(dbgs() << "(live)");
+ LLVM_DEBUG(dbgs() << "(live)");
goto next_super_reg;
} else {
bool found = false;
@@ -683,7 +690,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
unsigned AliasReg = *AI;
if (State->IsLive(AliasReg) ||
(KillIndices[Reg] > DefIndices[AliasReg])) {
- DEBUG(dbgs() << "(alias " << printReg(AliasReg, TRI) << " live)");
+ LLVM_DEBUG(dbgs()
+ << "(alias " << printReg(AliasReg, TRI) << " live)");
found = true;
break;
}
@@ -701,7 +709,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
continue;
if (UseMI->getOperand(Idx).isEarlyClobber()) {
- DEBUG(dbgs() << "(ec)");
+ LLVM_DEBUG(dbgs() << "(ec)");
goto next_super_reg;
}
}
@@ -715,7 +723,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
MachineInstr *DefMI = Q.second.Operand->getParent();
if (DefMI->readsRegister(NewReg, TRI)) {
- DEBUG(dbgs() << "(ec)");
+ LLVM_DEBUG(dbgs() << "(ec)");
goto next_super_reg;
}
}
@@ -728,14 +736,14 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
// renamed, as recorded in RenameMap.
RenameOrder.erase(SuperRC);
RenameOrder.insert(RenameOrderType::value_type(SuperRC, R));
- DEBUG(dbgs() << "]\n");
+ LLVM_DEBUG(dbgs() << "]\n");
return true;
next_super_reg:
- DEBUG(dbgs() << ']');
+ LLVM_DEBUG(dbgs() << ']');
} while (R != EndR);
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
// No registers are free and available!
return false;
@@ -788,13 +796,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
}
#ifndef NDEBUG
- DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n");
- DEBUG(dbgs() << "Available regs:");
+ LLVM_DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n");
+ LLVM_DEBUG(dbgs() << "Available regs:");
for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
if (!State->IsLive(Reg))
- DEBUG(dbgs() << " " << printReg(Reg, TRI));
+ LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI));
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
#endif
BitVector RegAliases(TRI->getNumRegs());
@@ -808,11 +816,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
I != E; --Count) {
MachineInstr &MI = *--I;
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
continue;
- DEBUG(dbgs() << "Anti: ");
- DEBUG(MI.dump());
+ LLVM_DEBUG(dbgs() << "Anti: ");
+ LLVM_DEBUG(MI.dump());
std::set<unsigned> PassthruRegs;
GetPassthruRegs(MI, PassthruRegs);
@@ -848,30 +856,30 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
(Edge->getKind() != SDep::Output)) continue;
unsigned AntiDepReg = Edge->getReg();
- DEBUG(dbgs() << "\tAntidep reg: " << printReg(AntiDepReg, TRI));
+ LLVM_DEBUG(dbgs() << "\tAntidep reg: " << printReg(AntiDepReg, TRI));
assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
if (!MRI.isAllocatable(AntiDepReg)) {
// Don't break anti-dependencies on non-allocatable registers.
- DEBUG(dbgs() << " (non-allocatable)\n");
+ LLVM_DEBUG(dbgs() << " (non-allocatable)\n");
continue;
} else if (ExcludeRegs && ExcludeRegs->test(AntiDepReg)) {
// Don't break anti-dependencies for critical path registers
// if not on the critical path
- DEBUG(dbgs() << " (not critical-path)\n");
+ LLVM_DEBUG(dbgs() << " (not critical-path)\n");
continue;
} else if (PassthruRegs.count(AntiDepReg) != 0) {
// If the anti-dep register liveness "passes-thru", then
// don't try to change it. It will be changed along with
// the use if required to break an earlier antidep.
- DEBUG(dbgs() << " (passthru)\n");
+ LLVM_DEBUG(dbgs() << " (passthru)\n");
continue;
} else {
// No anti-dep breaking for implicit deps
MachineOperand *AntiDepOp = MI.findRegisterDefOperand(AntiDepReg);
assert(AntiDepOp && "Can't find index for defined register operand");
if (!AntiDepOp || AntiDepOp->isImplicit()) {
- DEBUG(dbgs() << " (implicit)\n");
+ LLVM_DEBUG(dbgs() << " (implicit)\n");
continue;
}
@@ -897,13 +905,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
PE = PathSU->Preds.end(); P != PE; ++P) {
if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) &&
(P->getKind() != SDep::Output)) {
- DEBUG(dbgs() << " (real dependency)\n");
+ LLVM_DEBUG(dbgs() << " (real dependency)\n");
AntiDepReg = 0;
break;
} else if ((P->getSUnit() != NextSU) &&
(P->getKind() == SDep::Data) &&
(P->getReg() == AntiDepReg)) {
- DEBUG(dbgs() << " (other dependency)\n");
+ LLVM_DEBUG(dbgs() << " (other dependency)\n");
AntiDepReg = 0;
break;
}
@@ -941,17 +949,17 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// Determine AntiDepReg's register group.
const unsigned GroupIndex = State->GetGroup(AntiDepReg);
if (GroupIndex == 0) {
- DEBUG(dbgs() << " (zero group)\n");
+ LLVM_DEBUG(dbgs() << " (zero group)\n");
continue;
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
// Look for a suitable register to use to break the anti-dependence.
std::map<unsigned, unsigned> RenameMap;
if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) {
- DEBUG(dbgs() << "\tBreaking anti-dependence edge on "
- << printReg(AntiDepReg, TRI) << ":");
+ LLVM_DEBUG(dbgs() << "\tBreaking anti-dependence edge on "
+ << printReg(AntiDepReg, TRI) << ":");
// Handle each group register...
for (std::map<unsigned, unsigned>::iterator
@@ -959,9 +967,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
unsigned CurrReg = S->first;
unsigned NewReg = S->second;
- DEBUG(dbgs() << " " << printReg(CurrReg, TRI) << "->"
- << printReg(NewReg, TRI) << "("
- << RegRefs.count(CurrReg) << " refs)");
+ LLVM_DEBUG(dbgs() << " " << printReg(CurrReg, TRI) << "->"
+ << printReg(NewReg, TRI) << "("
+ << RegRefs.count(CurrReg) << " refs)");
// Update the references to the old register CurrReg to
// refer to the new register NewReg.
@@ -994,7 +1002,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
}
++Broken;
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
}
}
}
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
index 8e8c1d8e08d1..37dcb0be824e 100644
--- a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -39,7 +39,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
HardHints = true;
rewind();
- DEBUG({
+ LLVM_DEBUG({
if (!Hints.empty()) {
dbgs() << "hints:";
for (unsigned I = 0, E = Hints.size(); I != E; ++I)
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index 0731ae575437..79f11def38f7 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -629,26 +629,26 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
return true;
}
-static void collectFuncletMembers(
- DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet,
+static void collectEHScopeMembers(
+ DenseMap<const MachineBasicBlock *, int> &EHScopeMembership, int EHScope,
const MachineBasicBlock *MBB) {
SmallVector<const MachineBasicBlock *, 16> Worklist = {MBB};
while (!Worklist.empty()) {
const MachineBasicBlock *Visiting = Worklist.pop_back_val();
- // Don't follow blocks which start new funclets.
+ // Don't follow blocks which start new scopes.
if (Visiting->isEHPad() && Visiting != MBB)
continue;
- // Add this MBB to our funclet.
- auto P = FuncletMembership.insert(std::make_pair(Visiting, Funclet));
+ // Add this MBB to our scope.
+ auto P = EHScopeMembership.insert(std::make_pair(Visiting, EHScope));
// Don't revisit blocks.
if (!P.second) {
- assert(P.first->second == Funclet && "MBB is part of two funclets!");
+ assert(P.first->second == EHScope && "MBB is part of two scopes!");
continue;
}
- // Returns are boundaries where funclet transfer can occur, don't follow
+ // Returns are boundaries where scope transfer can occur, don't follow
// successors.
if (Visiting->isReturnBlock())
continue;
@@ -659,25 +659,25 @@ static void collectFuncletMembers(
}
DenseMap<const MachineBasicBlock *, int>
-llvm::getFuncletMembership(const MachineFunction &MF) {
- DenseMap<const MachineBasicBlock *, int> FuncletMembership;
+llvm::getEHScopeMembership(const MachineFunction &MF) {
+ DenseMap<const MachineBasicBlock *, int> EHScopeMembership;
// We don't have anything to do if there aren't any EH pads.
- if (!MF.hasEHFunclets())
- return FuncletMembership;
+ if (!MF.hasEHScopes())
+ return EHScopeMembership;
int EntryBBNumber = MF.front().getNumber();
bool IsSEH = isAsynchronousEHPersonality(
classifyEHPersonality(MF.getFunction().getPersonalityFn()));
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- SmallVector<const MachineBasicBlock *, 16> FuncletBlocks;
+ SmallVector<const MachineBasicBlock *, 16> EHScopeBlocks;
SmallVector<const MachineBasicBlock *, 16> UnreachableBlocks;
SmallVector<const MachineBasicBlock *, 16> SEHCatchPads;
SmallVector<std::pair<const MachineBasicBlock *, int>, 16> CatchRetSuccessors;
for (const MachineBasicBlock &MBB : MF) {
- if (MBB.isEHFuncletEntry()) {
- FuncletBlocks.push_back(&MBB);
+ if (MBB.isEHScopeEntry()) {
+ EHScopeBlocks.push_back(&MBB);
} else if (IsSEH && MBB.isEHPad()) {
SEHCatchPads.push_back(&MBB);
} else if (MBB.pred_empty()) {
@@ -686,8 +686,8 @@ llvm::getFuncletMembership(const MachineFunction &MF) {
MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
- // CatchPads are not funclets for SEH so do not consider CatchRet to
- // transfer control to another funclet.
+ // CatchPads are not scopes for SEH so do not consider CatchRet to
+ // transfer control to another scope.
if (MBBI == MBB.end() || MBBI->getOpcode() != TII->getCatchReturnOpcode())
continue;
@@ -700,24 +700,24 @@ llvm::getFuncletMembership(const MachineFunction &MF) {
}
// We don't have anything to do if there aren't any EH pads.
- if (FuncletBlocks.empty())
- return FuncletMembership;
+ if (EHScopeBlocks.empty())
+ return EHScopeMembership;
// Identify all the basic blocks reachable from the function entry.
- collectFuncletMembers(FuncletMembership, EntryBBNumber, &MF.front());
- // All blocks not part of a funclet are in the parent function.
+ collectEHScopeMembers(EHScopeMembership, EntryBBNumber, &MF.front());
+ // All blocks not part of a scope are in the parent function.
for (const MachineBasicBlock *MBB : UnreachableBlocks)
- collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB);
- // Next, identify all the blocks inside the funclets.
- for (const MachineBasicBlock *MBB : FuncletBlocks)
- collectFuncletMembers(FuncletMembership, MBB->getNumber(), MBB);
- // SEH CatchPads aren't really funclets, handle them separately.
+ collectEHScopeMembers(EHScopeMembership, EntryBBNumber, MBB);
+ // Next, identify all the blocks inside the scopes.
+ for (const MachineBasicBlock *MBB : EHScopeBlocks)
+ collectEHScopeMembers(EHScopeMembership, MBB->getNumber(), MBB);
+ // SEH CatchPads aren't really scopes, handle them separately.
for (const MachineBasicBlock *MBB : SEHCatchPads)
- collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB);
+ collectEHScopeMembers(EHScopeMembership, EntryBBNumber, MBB);
// Finally, identify all the targets of a catchret.
for (std::pair<const MachineBasicBlock *, int> CatchRetPair :
CatchRetSuccessors)
- collectFuncletMembers(FuncletMembership, CatchRetPair.second,
+ collectEHScopeMembers(EHScopeMembership, CatchRetPair.second,
CatchRetPair.first);
- return FuncletMembership;
+ return EHScopeMembership;
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index 15cfbd5c40ff..9011f025f595 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -91,7 +91,8 @@ void ARMException::endFunction(const MachineFunction *MF) {
ATS.emitFnEnd();
}
-void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
+void ARMException::emitTypeInfos(unsigned TTypeEncoding,
+ MCSymbol *TTBaseLabel) {
const MachineFunction *MF = Asm->MF;
const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
const std::vector<unsigned> &FilterIds = MF->getFilterIds();
@@ -112,6 +113,8 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
Asm->EmitTTypeReference(GV, TTypeEncoding);
}
+ Asm->OutStreamer->EmitLabel(TTBaseLabel);
+
// Emit the Exception Specifications.
if (VerboseAsm && !FilterIds.empty()) {
Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
new file mode 100644
index 000000000000..20b0b8d3feab
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -0,0 +1,721 @@
+//===- llvm/CodeGen/AsmPrinter/AccelTable.cpp - Accelerator Tables --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AccelTable.h"
+#include "DwarfCompileUnit.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <vector>
+
+using namespace llvm;
+
+void AccelTableBase::computeBucketCount() {
+ // First get the number of unique hashes.
+ std::vector<uint32_t> Uniques;
+ Uniques.reserve(Entries.size());
+ for (const auto &E : Entries)
+ Uniques.push_back(E.second.HashValue);
+ array_pod_sort(Uniques.begin(), Uniques.end());
+ std::vector<uint32_t>::iterator P =
+ std::unique(Uniques.begin(), Uniques.end());
+
+ UniqueHashCount = std::distance(Uniques.begin(), P);
+
+ if (UniqueHashCount > 1024)
+ BucketCount = UniqueHashCount / 4;
+ else if (UniqueHashCount > 16)
+ BucketCount = UniqueHashCount / 2;
+ else
+ BucketCount = std::max<uint32_t>(UniqueHashCount, 1);
+}
+
+void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) {
+ // Create the individual hash data outputs.
+ for (auto &E : Entries) {
+ // Unique the entries.
+ std::stable_sort(E.second.Values.begin(), E.second.Values.end(),
+ [](const AccelTableData *A, const AccelTableData *B) {
+ return *A < *B;
+ });
+ E.second.Values.erase(
+ std::unique(E.second.Values.begin(), E.second.Values.end()),
+ E.second.Values.end());
+ }
+
+ // Figure out how many buckets we need, then compute the bucket contents and
+ // the final ordering. The hashes and offsets can be emitted by walking these
+ // data structures. We add temporary symbols to the data so they can be
+ // referenced when emitting the offsets.
+ computeBucketCount();
+
+ // Compute bucket contents and final ordering.
+ Buckets.resize(BucketCount);
+ for (auto &E : Entries) {
+ uint32_t Bucket = E.second.HashValue % BucketCount;
+ Buckets[Bucket].push_back(&E.second);
+ E.second.Sym = Asm->createTempSymbol(Prefix);
+ }
+
+ // Sort the contents of the buckets by hash value so that hash collisions end
+ // up together. Stable sort makes testing easier and doesn't cost much more.
+ for (auto &Bucket : Buckets)
+ std::stable_sort(Bucket.begin(), Bucket.end(),
+ [](HashData *LHS, HashData *RHS) {
+ return LHS->HashValue < RHS->HashValue;
+ });
+}
+
+namespace {
+/// Base class for writing out Accelerator tables. It holds the common
+/// functionality for the two Accelerator table types.
+class AccelTableWriter {
+protected:
+ AsmPrinter *const Asm; ///< Destination.
+ const AccelTableBase &Contents; ///< Data to emit.
+
+ /// Controls whether to emit duplicate hash and offset table entries for names
+ /// with identical hashes. Apple tables don't emit duplicate entries, DWARF v5
+ /// tables do.
+ const bool SkipIdenticalHashes;
+
+ void emitHashes() const;
+
+ /// Emit offsets to lists of entries with identical names. The offsets are
+ /// relative to the Base argument.
+ void emitOffsets(const MCSymbol *Base) const;
+
+public:
+ AccelTableWriter(AsmPrinter *Asm, const AccelTableBase &Contents,
+ bool SkipIdenticalHashes)
+ : Asm(Asm), Contents(Contents), SkipIdenticalHashes(SkipIdenticalHashes) {
+ }
+};
+
+class AppleAccelTableWriter : public AccelTableWriter {
+ using Atom = AppleAccelTableData::Atom;
+
+ /// The fixed header of an Apple Accelerator Table.
+ struct Header {
+ uint32_t Magic = MagicHash;
+ uint16_t Version = 1;
+ uint16_t HashFunction = dwarf::DW_hash_function_djb;
+ uint32_t BucketCount;
+ uint32_t HashCount;
+ uint32_t HeaderDataLength;
+
+ /// 'HASH' magic value to detect endianness.
+ static const uint32_t MagicHash = 0x48415348;
+
+ Header(uint32_t BucketCount, uint32_t UniqueHashCount, uint32_t DataLength)
+ : BucketCount(BucketCount), HashCount(UniqueHashCount),
+ HeaderDataLength(DataLength) {}
+
+ void emit(AsmPrinter *Asm) const;
+#ifndef NDEBUG
+ void print(raw_ostream &OS) const;
+ void dump() const { print(dbgs()); }
+#endif
+ };
+
+ /// The HeaderData describes the structure of an Apple accelerator table
+ /// through a list of Atoms.
+ struct HeaderData {
+ /// In the case of data that is referenced via DW_FORM_ref_* the offset
+ /// base is used to describe the offset for all forms in the list of atoms.
+ uint32_t DieOffsetBase;
+
+ const SmallVector<Atom, 4> Atoms;
+
+ HeaderData(ArrayRef<Atom> AtomList, uint32_t Offset = 0)
+ : DieOffsetBase(Offset), Atoms(AtomList.begin(), AtomList.end()) {}
+
+ void emit(AsmPrinter *Asm) const;
+#ifndef NDEBUG
+ void print(raw_ostream &OS) const;
+ void dump() const { print(dbgs()); }
+#endif
+ };
+
+ Header Header;
+ HeaderData HeaderData;
+ const MCSymbol *SecBegin;
+
+ void emitBuckets() const;
+ void emitData() const;
+
+public:
+ AppleAccelTableWriter(AsmPrinter *Asm, const AccelTableBase &Contents,
+ ArrayRef<Atom> Atoms, const MCSymbol *SecBegin)
+ : AccelTableWriter(Asm, Contents, true),
+ Header(Contents.getBucketCount(), Contents.getUniqueHashCount(),
+ 8 + (Atoms.size() * 4)),
+ HeaderData(Atoms), SecBegin(SecBegin) {}
+
+ void emit() const;
+
+#ifndef NDEBUG
+ void print(raw_ostream &OS) const;
+ void dump() const { print(dbgs()); }
+#endif
+};
+
+/// Class responsible for emitting a DWARF v5 Accelerator Table. The only
+/// public function is emit(), which performs the actual emission.
+///
+/// The class is templated in its data type. This allows us to emit both dyamic
+/// and static data entries. A callback abstract the logic to provide a CU
+/// index for a given entry, which is different per data type, but identical
+/// for every entry in the same table.
+template <typename DataT>
+class Dwarf5AccelTableWriter : public AccelTableWriter {
+ struct Header {
+ uint32_t UnitLength = 0;
+ uint16_t Version = 5;
+ uint16_t Padding = 0;
+ uint32_t CompUnitCount;
+ uint32_t LocalTypeUnitCount = 0;
+ uint32_t ForeignTypeUnitCount = 0;
+ uint32_t BucketCount;
+ uint32_t NameCount;
+ uint32_t AbbrevTableSize = 0;
+ uint32_t AugmentationStringSize = sizeof(AugmentationString);
+ char AugmentationString[8] = {'L', 'L', 'V', 'M', '0', '7', '0', '0'};
+
+ Header(uint32_t CompUnitCount, uint32_t BucketCount, uint32_t NameCount)
+ : CompUnitCount(CompUnitCount), BucketCount(BucketCount),
+ NameCount(NameCount) {}
+
+ void emit(const Dwarf5AccelTableWriter &Ctx) const;
+ };
+ struct AttributeEncoding {
+ dwarf::Index Index;
+ dwarf::Form Form;
+ };
+
+ Header Header;
+ DenseMap<uint32_t, SmallVector<AttributeEncoding, 2>> Abbreviations;
+ ArrayRef<MCSymbol *> CompUnits;
+ llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry;
+ MCSymbol *ContributionStart = Asm->createTempSymbol("names_start");
+ MCSymbol *ContributionEnd = Asm->createTempSymbol("names_end");
+ MCSymbol *AbbrevStart = Asm->createTempSymbol("names_abbrev_start");
+ MCSymbol *AbbrevEnd = Asm->createTempSymbol("names_abbrev_end");
+ MCSymbol *EntryPool = Asm->createTempSymbol("names_entries");
+
+ DenseSet<uint32_t> getUniqueTags() const;
+
+ // Right now, we emit uniform attributes for all tags.
+ SmallVector<AttributeEncoding, 2> getUniformAttributes() const;
+
+ void emitCUList() const;
+ void emitBuckets() const;
+ void emitStringOffsets() const;
+ void emitAbbrevs() const;
+ void emitEntry(const DataT &Entry) const;
+ void emitData() const;
+
+public:
+ Dwarf5AccelTableWriter(
+ AsmPrinter *Asm, const AccelTableBase &Contents,
+ ArrayRef<MCSymbol *> CompUnits,
+ llvm::function_ref<unsigned(const DataT &)> GetCUIndexForEntry);
+
+ void emit() const;
+};
+} // namespace
+
+void AccelTableWriter::emitHashes() const {
+ uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
+ unsigned BucketIdx = 0;
+ for (auto &Bucket : Contents.getBuckets()) {
+ for (auto &Hash : Bucket) {
+ uint32_t HashValue = Hash->HashValue;
+ if (SkipIdenticalHashes && PrevHash == HashValue)
+ continue;
+ Asm->OutStreamer->AddComment("Hash in Bucket " + Twine(BucketIdx));
+ Asm->emitInt32(HashValue);
+ PrevHash = HashValue;
+ }
+ BucketIdx++;
+ }
+}
+
+void AccelTableWriter::emitOffsets(const MCSymbol *Base) const {
+ const auto &Buckets = Contents.getBuckets();
+ uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (auto *Hash : Buckets[i]) {
+ uint32_t HashValue = Hash->HashValue;
+ if (SkipIdenticalHashes && PrevHash == HashValue)
+ continue;
+ PrevHash = HashValue;
+ Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i));
+ Asm->EmitLabelDifference(Hash->Sym, Base, sizeof(uint32_t));
+ }
+ }
+}
+
+void AppleAccelTableWriter::Header::emit(AsmPrinter *Asm) const {
+ Asm->OutStreamer->AddComment("Header Magic");
+ Asm->emitInt32(Magic);
+ Asm->OutStreamer->AddComment("Header Version");
+ Asm->emitInt16(Version);
+ Asm->OutStreamer->AddComment("Header Hash Function");
+ Asm->emitInt16(HashFunction);
+ Asm->OutStreamer->AddComment("Header Bucket Count");
+ Asm->emitInt32(BucketCount);
+ Asm->OutStreamer->AddComment("Header Hash Count");
+ Asm->emitInt32(HashCount);
+ Asm->OutStreamer->AddComment("Header Data Length");
+ Asm->emitInt32(HeaderDataLength);
+}
+
+void AppleAccelTableWriter::HeaderData::emit(AsmPrinter *Asm) const {
+ Asm->OutStreamer->AddComment("HeaderData Die Offset Base");
+ Asm->emitInt32(DieOffsetBase);
+ Asm->OutStreamer->AddComment("HeaderData Atom Count");
+ Asm->emitInt32(Atoms.size());
+
+ for (const Atom &A : Atoms) {
+ Asm->OutStreamer->AddComment(dwarf::AtomTypeString(A.Type));
+ Asm->emitInt16(A.Type);
+ Asm->OutStreamer->AddComment(dwarf::FormEncodingString(A.Form));
+ Asm->emitInt16(A.Form);
+ }
+}
+
+void AppleAccelTableWriter::emitBuckets() const {
+ const auto &Buckets = Contents.getBuckets();
+ unsigned index = 0;
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ Asm->OutStreamer->AddComment("Bucket " + Twine(i));
+ if (!Buckets[i].empty())
+ Asm->emitInt32(index);
+ else
+ Asm->emitInt32(std::numeric_limits<uint32_t>::max());
+ // Buckets point in the list of hashes, not to the data. Do not increment
+ // the index multiple times in case of hash collisions.
+ uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
+ for (auto *HD : Buckets[i]) {
+ uint32_t HashValue = HD->HashValue;
+ if (PrevHash != HashValue)
+ ++index;
+ PrevHash = HashValue;
+ }
+ }
+}
+
+void AppleAccelTableWriter::emitData() const {
+ const auto &Buckets = Contents.getBuckets();
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
+ for (auto &Hash : Buckets[i]) {
+ // Terminate the previous entry if there is no hash collision with the
+ // current one.
+ if (PrevHash != std::numeric_limits<uint64_t>::max() &&
+ PrevHash != Hash->HashValue)
+ Asm->emitInt32(0);
+ // Remember to emit the label for our offset.
+ Asm->OutStreamer->EmitLabel(Hash->Sym);
+ Asm->OutStreamer->AddComment(Hash->Name.getString());
+ Asm->emitDwarfStringOffset(Hash->Name);
+ Asm->OutStreamer->AddComment("Num DIEs");
+ Asm->emitInt32(Hash->Values.size());
+ for (const auto *V : Hash->Values)
+ static_cast<const AppleAccelTableData *>(V)->emit(Asm);
+ PrevHash = Hash->HashValue;
+ }
+ // Emit the final end marker for the bucket.
+ if (!Buckets[i].empty())
+ Asm->emitInt32(0);
+ }
+}
+
+void AppleAccelTableWriter::emit() const {
+ Header.emit(Asm);
+ HeaderData.emit(Asm);
+ emitBuckets();
+ emitHashes();
+ emitOffsets(SecBegin);
+ emitData();
+}
+
+template <typename DataT>
+void Dwarf5AccelTableWriter<DataT>::Header::emit(
+ const Dwarf5AccelTableWriter &Ctx) const {
+ assert(CompUnitCount > 0 && "Index must have at least one CU.");
+
+ AsmPrinter *Asm = Ctx.Asm;
+ Asm->OutStreamer->AddComment("Header: unit length");
+ Asm->EmitLabelDifference(Ctx.ContributionEnd, Ctx.ContributionStart,
+ sizeof(uint32_t));
+ Asm->OutStreamer->EmitLabel(Ctx.ContributionStart);
+ Asm->OutStreamer->AddComment("Header: version");
+ Asm->emitInt16(Version);
+ Asm->OutStreamer->AddComment("Header: padding");
+ Asm->emitInt16(Padding);
+ Asm->OutStreamer->AddComment("Header: compilation unit count");
+ Asm->emitInt32(CompUnitCount);
+ Asm->OutStreamer->AddComment("Header: local type unit count");
+ Asm->emitInt32(LocalTypeUnitCount);
+ Asm->OutStreamer->AddComment("Header: foreign type unit count");
+ Asm->emitInt32(ForeignTypeUnitCount);
+ Asm->OutStreamer->AddComment("Header: bucket count");
+ Asm->emitInt32(BucketCount);
+ Asm->OutStreamer->AddComment("Header: name count");
+ Asm->emitInt32(NameCount);
+ Asm->OutStreamer->AddComment("Header: abbreviation table size");
+ Asm->EmitLabelDifference(Ctx.AbbrevEnd, Ctx.AbbrevStart, sizeof(uint32_t));
+ Asm->OutStreamer->AddComment("Header: augmentation string size");
+ assert(AugmentationStringSize % 4 == 0);
+ Asm->emitInt32(AugmentationStringSize);
+ Asm->OutStreamer->AddComment("Header: augmentation string");
+ Asm->OutStreamer->EmitBytes({AugmentationString, AugmentationStringSize});
+}
+
+template <typename DataT>
+DenseSet<uint32_t> Dwarf5AccelTableWriter<DataT>::getUniqueTags() const {
+ DenseSet<uint32_t> UniqueTags;
+ for (auto &Bucket : Contents.getBuckets()) {
+ for (auto *Hash : Bucket) {
+ for (auto *Value : Hash->Values) {
+ unsigned Tag = static_cast<const DataT *>(Value)->getDieTag();
+ UniqueTags.insert(Tag);
+ }
+ }
+ }
+ return UniqueTags;
+}
+
+template <typename DataT>
+SmallVector<typename Dwarf5AccelTableWriter<DataT>::AttributeEncoding, 2>
+Dwarf5AccelTableWriter<DataT>::getUniformAttributes() const {
+ SmallVector<AttributeEncoding, 2> UA;
+ if (CompUnits.size() > 1) {
+ size_t LargestCUIndex = CompUnits.size() - 1;
+ dwarf::Form Form = DIEInteger::BestForm(/*IsSigned*/ false, LargestCUIndex);
+ UA.push_back({dwarf::DW_IDX_compile_unit, Form});
+ }
+ UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
+ return UA;
+}
+
+template <typename DataT>
+void Dwarf5AccelTableWriter<DataT>::emitCUList() const {
+ for (const auto &CU : enumerate(CompUnits)) {
+ Asm->OutStreamer->AddComment("Compilation unit " + Twine(CU.index()));
+ Asm->emitDwarfSymbolReference(CU.value());
+ }
+}
+
+template <typename DataT>
+void Dwarf5AccelTableWriter<DataT>::emitBuckets() const {
+ uint32_t Index = 1;
+ for (const auto &Bucket : enumerate(Contents.getBuckets())) {
+ Asm->OutStreamer->AddComment("Bucket " + Twine(Bucket.index()));
+ Asm->emitInt32(Bucket.value().empty() ? 0 : Index);
+ Index += Bucket.value().size();
+ }
+}
+
+template <typename DataT>
+void Dwarf5AccelTableWriter<DataT>::emitStringOffsets() const {
+ for (const auto &Bucket : enumerate(Contents.getBuckets())) {
+ for (auto *Hash : Bucket.value()) {
+ DwarfStringPoolEntryRef String = Hash->Name;
+ Asm->OutStreamer->AddComment("String in Bucket " + Twine(Bucket.index()) +
+ ": " + String.getString());
+ Asm->emitDwarfStringOffset(String);
+ }
+ }
+}
+
+template <typename DataT>
+void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const {
+ Asm->OutStreamer->EmitLabel(AbbrevStart);
+ for (const auto &Abbrev : Abbreviations) {
+ Asm->OutStreamer->AddComment("Abbrev code");
+ assert(Abbrev.first != 0);
+ Asm->EmitULEB128(Abbrev.first);
+ Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev.first));
+ Asm->EmitULEB128(Abbrev.first);
+ for (const auto &AttrEnc : Abbrev.second) {
+ Asm->EmitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data());
+ Asm->EmitULEB128(AttrEnc.Form,
+ dwarf::FormEncodingString(AttrEnc.Form).data());
+ }
+ Asm->EmitULEB128(0, "End of abbrev");
+ Asm->EmitULEB128(0, "End of abbrev");
+ }
+ Asm->EmitULEB128(0, "End of abbrev list");
+ Asm->OutStreamer->EmitLabel(AbbrevEnd);
+}
+
+template <typename DataT>
+void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) const {
+ auto AbbrevIt = Abbreviations.find(Entry.getDieTag());
+ assert(AbbrevIt != Abbreviations.end() &&
+ "Why wasn't this abbrev generated?");
+
+ Asm->EmitULEB128(AbbrevIt->first, "Abbreviation code");
+ for (const auto &AttrEnc : AbbrevIt->second) {
+ Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index));
+ switch (AttrEnc.Index) {
+ case dwarf::DW_IDX_compile_unit: {
+ DIEInteger ID(getCUIndexForEntry(Entry));
+ ID.EmitValue(Asm, AttrEnc.Form);
+ break;
+ }
+ case dwarf::DW_IDX_die_offset:
+ assert(AttrEnc.Form == dwarf::DW_FORM_ref4);
+ Asm->emitInt32(Entry.getDieOffset());
+ break;
+ default:
+ llvm_unreachable("Unexpected index attribute!");
+ }
+ }
+}
+
+template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emitData() const {
+ Asm->OutStreamer->EmitLabel(EntryPool);
+ for (auto &Bucket : Contents.getBuckets()) {
+ for (auto *Hash : Bucket) {
+ // Remember to emit the label for our offset.
+ Asm->OutStreamer->EmitLabel(Hash->Sym);
+ for (const auto *Value : Hash->Values)
+ emitEntry(*static_cast<const DataT *>(Value));
+ Asm->OutStreamer->AddComment("End of list: " + Hash->Name.getString());
+ Asm->emitInt32(0);
+ }
+ }
+}
+
+template <typename DataT>
+Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter(
+ AsmPrinter *Asm, const AccelTableBase &Contents,
+ ArrayRef<MCSymbol *> CompUnits,
+ llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry)
+ : AccelTableWriter(Asm, Contents, false),
+ Header(CompUnits.size(), Contents.getBucketCount(),
+ Contents.getUniqueNameCount()),
+ CompUnits(CompUnits), getCUIndexForEntry(std::move(getCUIndexForEntry)) {
+ DenseSet<uint32_t> UniqueTags = getUniqueTags();
+ SmallVector<AttributeEncoding, 2> UniformAttributes = getUniformAttributes();
+
+ Abbreviations.reserve(UniqueTags.size());
+ for (uint32_t Tag : UniqueTags)
+ Abbreviations.try_emplace(Tag, UniformAttributes);
+}
+
+template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() const {
+ Header.emit(*this);
+ emitCUList();
+ emitBuckets();
+ emitHashes();
+ emitStringOffsets();
+ emitOffsets(EntryPool);
+ emitAbbrevs();
+ emitData();
+ Asm->OutStreamer->EmitValueToAlignment(4, 0);
+ Asm->OutStreamer->EmitLabel(ContributionEnd);
+}
+
+void llvm::emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents,
+ StringRef Prefix, const MCSymbol *SecBegin,
+ ArrayRef<AppleAccelTableData::Atom> Atoms) {
+ Contents.finalize(Asm, Prefix);
+ AppleAccelTableWriter(Asm, Contents, Atoms, SecBegin).emit();
+}
+
+void llvm::emitDWARF5AccelTable(
+ AsmPrinter *Asm, AccelTable<DWARF5AccelTableData> &Contents,
+ const DwarfDebug &DD, ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs) {
+ std::vector<MCSymbol *> CompUnits;
+ for (const auto &CU : enumerate(CUs)) {
+ assert(CU.index() == CU.value()->getUniqueID());
+ const DwarfCompileUnit *MainCU =
+ DD.useSplitDwarf() ? CU.value()->getSkeleton() : CU.value().get();
+ CompUnits.push_back(MainCU->getLabelBegin());
+ }
+
+ Contents.finalize(Asm, "names");
+ Dwarf5AccelTableWriter<DWARF5AccelTableData>(
+ Asm, Contents, CompUnits,
+ [&DD](const DWARF5AccelTableData &Entry) {
+ const DIE *CUDie = Entry.getDie().getUnitDie();
+ return DD.lookupCU(CUDie)->getUniqueID();
+ })
+ .emit();
+}
+
+void llvm::emitDWARF5AccelTable(
+ AsmPrinter *Asm, AccelTable<DWARF5AccelTableStaticData> &Contents,
+ ArrayRef<MCSymbol *> CUs,
+ llvm::function_ref<unsigned(const DWARF5AccelTableStaticData &)>
+ getCUIndexForEntry) {
+ Contents.finalize(Asm, "names");
+ Dwarf5AccelTableWriter<DWARF5AccelTableStaticData>(Asm, Contents, CUs,
+ getCUIndexForEntry)
+ .emit();
+}
+
+void AppleAccelTableOffsetData::emit(AsmPrinter *Asm) const {
+ Asm->emitInt32(Die.getDebugSectionOffset());
+}
+
+void AppleAccelTableTypeData::emit(AsmPrinter *Asm) const {
+ Asm->emitInt32(Die.getDebugSectionOffset());
+ Asm->emitInt16(Die.getTag());
+ Asm->emitInt8(0);
+}
+
+void AppleAccelTableStaticOffsetData::emit(AsmPrinter *Asm) const {
+ Asm->emitInt32(Offset);
+}
+
+void AppleAccelTableStaticTypeData::emit(AsmPrinter *Asm) const {
+ Asm->emitInt32(Offset);
+ Asm->emitInt16(Tag);
+ Asm->emitInt8(ObjCClassIsImplementation ? dwarf::DW_FLAG_type_implementation
+ : 0);
+ Asm->emitInt32(QualifiedNameHash);
+}
+
+#ifndef _MSC_VER
+// The lines below are rejected by older versions (TBD) of MSVC.
+constexpr AppleAccelTableData::Atom AppleAccelTableTypeData::Atoms[];
+constexpr AppleAccelTableData::Atom AppleAccelTableOffsetData::Atoms[];
+constexpr AppleAccelTableData::Atom AppleAccelTableStaticOffsetData::Atoms[];
+constexpr AppleAccelTableData::Atom AppleAccelTableStaticTypeData::Atoms[];
+#else
+// FIXME: Erase this path once the minimum MSCV version has been bumped.
+const SmallVector<AppleAccelTableData::Atom, 4>
+ AppleAccelTableOffsetData::Atoms = {
+ Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
+const SmallVector<AppleAccelTableData::Atom, 4> AppleAccelTableTypeData::Atoms =
+ {Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
+ Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
+ Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
+const SmallVector<AppleAccelTableData::Atom, 4>
+ AppleAccelTableStaticOffsetData::Atoms = {
+ Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
+const SmallVector<AppleAccelTableData::Atom, 4>
+ AppleAccelTableStaticTypeData::Atoms = {
+ Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
+ Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
+ Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)};
+#endif
+
+#ifndef NDEBUG
+void AppleAccelTableWriter::Header::print(raw_ostream &OS) const {
+ OS << "Magic: " << format("0x%x", Magic) << "\n"
+ << "Version: " << Version << "\n"
+ << "Hash Function: " << HashFunction << "\n"
+ << "Bucket Count: " << BucketCount << "\n"
+ << "Header Data Length: " << HeaderDataLength << "\n";
+}
+
+void AppleAccelTableData::Atom::print(raw_ostream &OS) const {
+ OS << "Type: " << dwarf::AtomTypeString(Type) << "\n"
+ << "Form: " << dwarf::FormEncodingString(Form) << "\n";
+}
+
+void AppleAccelTableWriter::HeaderData::print(raw_ostream &OS) const {
+ OS << "DIE Offset Base: " << DieOffsetBase << "\n";
+ for (auto Atom : Atoms)
+ Atom.print(OS);
+}
+
+void AppleAccelTableWriter::print(raw_ostream &OS) const {
+ Header.print(OS);
+ HeaderData.print(OS);
+ Contents.print(OS);
+ SecBegin->print(OS, nullptr);
+}
+
+void AccelTableBase::HashData::print(raw_ostream &OS) const {
+ OS << "Name: " << Name.getString() << "\n";
+ OS << " Hash Value: " << format("0x%x", HashValue) << "\n";
+ OS << " Symbol: ";
+ if (Sym)
+ OS << *Sym;
+ else
+ OS << "<none>";
+ OS << "\n";
+ for (auto *Value : Values)
+ Value->print(OS);
+}
+
+void AccelTableBase::print(raw_ostream &OS) const {
+ // Print Content.
+ OS << "Entries: \n";
+ for (const auto &Entry : Entries) {
+ OS << "Name: " << Entry.first() << "\n";
+ for (auto *V : Entry.second.Values)
+ V->print(OS);
+ }
+
+ OS << "Buckets and Hashes: \n";
+ for (auto &Bucket : Buckets)
+ for (auto &Hash : Bucket)
+ Hash->print(OS);
+
+ OS << "Data: \n";
+ for (auto &E : Entries)
+ E.second.print(OS);
+}
+
+void DWARF5AccelTableData::print(raw_ostream &OS) const {
+ OS << " Offset: " << getDieOffset() << "\n";
+ OS << " Tag: " << dwarf::TagString(getDieTag()) << "\n";
+}
+
+void DWARF5AccelTableStaticData::print(raw_ostream &OS) const {
+ OS << " Offset: " << getDieOffset() << "\n";
+ OS << " Tag: " << dwarf::TagString(getDieTag()) << "\n";
+}
+
+void AppleAccelTableOffsetData::print(raw_ostream &OS) const {
+ OS << " Offset: " << Die.getOffset() << "\n";
+}
+
+void AppleAccelTableTypeData::print(raw_ostream &OS) const {
+ OS << " Offset: " << Die.getOffset() << "\n";
+ OS << " Tag: " << dwarf::TagString(Die.getTag()) << "\n";
+}
+
+void AppleAccelTableStaticOffsetData::print(raw_ostream &OS) const {
+ OS << " Static Offset: " << Offset << "\n";
+}
+
+void AppleAccelTableStaticTypeData::print(raw_ostream &OS) const {
+ OS << " Static Offset: " << Offset << "\n";
+ OS << " QualifiedNameHash: " << format("%x\n", QualifiedNameHash) << "\n";
+ OS << " Tag: " << dwarf::TagString(Tag) << "\n";
+ OS << " ObjCClassIsImplementation: "
+ << (ObjCClassIsImplementation ? "true" : "false");
+ OS << "\n";
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 59ed0324bdb0..4a226527cb5b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -10,9 +10,9 @@
#include "AddressPool.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include <utility>
using namespace llvm;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
index 990a158d87cd..5350006bf744 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -39,7 +39,7 @@ class AddressPool {
public:
AddressPool() = default;
- /// \brief Returns the index into the address pool with the given
+ /// Returns the index into the address pool with the given
/// label/symbol.
unsigned getIndex(const MCSymbol *Sym, bool TLS = false);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d7995447592c..9bbc77b3056b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -16,6 +16,7 @@
#include "CodeViewDebug.h"
#include "DwarfDebug.h"
#include "DwarfException.h"
+#include "WinCFGuard.h"
#include "WinException.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -30,7 +31,6 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Analysis/ObjectUtils.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -39,6 +39,7 @@
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -54,7 +55,6 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -87,6 +87,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
@@ -107,6 +108,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
@@ -130,6 +132,8 @@ static const char *const DbgTimerName = "emit";
static const char *const DbgTimerDescription = "Debug Info Emission";
static const char *const EHTimerName = "write_exception";
static const char *const EHTimerDescription = "DWARF Exception Writer";
+static const char *const CFGuardName = "Control Flow Guard";
+static const char *const CFGuardDescription = "Control Flow Guard Tables";
static const char *const CodeViewLineTablesGroupName = "linetables";
static const char *const CodeViewLineTablesGroupDescription =
"CodeView Line Tables";
@@ -211,8 +215,10 @@ const DataLayout &AsmPrinter::getDataLayout() const {
}
// Do not use the cached DataLayout because some client use it without a Module
-// (llvm-dsymutil, llvm-dwarfdump).
-unsigned AsmPrinter::getPointerSize() const { return TM.getPointerSize(); }
+// (dsymutil, llvm-dwarfdump).
+unsigned AsmPrinter::getPointerSize() const {
+ return TM.getPointerSize(0); // FIXME: Default address space
+}
const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
assert(MF && "getSubtargetInfo requires a valid MachineFunction!");
@@ -234,7 +240,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineModuleInfo>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
- AU.addRequired<MachineLoopInfo>();
}
bool AsmPrinter::doInitialization(Module &M) {
@@ -246,7 +251,7 @@ bool AsmPrinter::doInitialization(Module &M) {
OutStreamer->InitSections(false);
- // Emit the version-min deplyment target directive if needed.
+ // Emit the version-min deployment target directive if needed.
//
// FIXME: If we end up with a collection of these sorts of Darwin-specific
// or ELF-specific things, it may make sense to have a platform helper class
@@ -291,8 +296,7 @@ bool AsmPrinter::doInitialization(Module &M) {
if (MAI->doesSupportDebugInformation()) {
bool EmitCodeView = MMI->getModule()->getCodeViewFlag();
- if (EmitCodeView && (TM.getTargetTriple().isKnownWindowsMSVCEnvironment() ||
- TM.getTargetTriple().isWindowsItaniumEnvironment())) {
+ if (EmitCodeView && TM.getTargetTriple().isOSWindows()) {
Handlers.push_back(HandlerInfo(new CodeViewDebug(this),
DbgTimerName, DbgTimerDescription,
CodeViewLineTablesGroupName,
@@ -350,10 +354,20 @@ bool AsmPrinter::doInitialization(Module &M) {
break;
}
break;
+ case ExceptionHandling::Wasm:
+ // TODO to prevent warning
+ break;
}
if (ES)
Handlers.push_back(HandlerInfo(ES, EHTimerName, EHTimerDescription,
DWARFGroupName, DWARFGroupDescription));
+
+ if (mdconst::extract_or_null<ConstantInt>(
+ MMI->getModule()->getModuleFlag("cfguard")))
+ Handlers.push_back(HandlerInfo(new WinCFGuard(this), CFGuardName,
+ CFGuardDescription, DWARFGroupName,
+ DWARFGroupDescription));
+
return false;
}
@@ -361,7 +375,7 @@ static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) {
if (!MAI.hasWeakDefCanBeHiddenDirective())
return false;
- return canBeOmittedFromSymbolTable(GV);
+ return GV->canBeOmittedFromSymbolTable();
}
void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
@@ -416,7 +430,7 @@ MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {
/// EmitGlobalVariable - Emit the specified global variable to the .s file.
void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
- bool IsEmuTLSVar = TM.Options.EmulatedTLS && GV->isThreadLocal();
+ bool IsEmuTLSVar = TM.useEmulatedTLS() && GV->isThreadLocal();
assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) &&
"No emulated TLS variables in the common section");
@@ -898,6 +912,30 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
return true;
}
+/// This method handles the target-independent form of DBG_LABEL, returning
+/// true if it was able to do so. A false return means the target will need
+/// to handle MI in EmitInstruction.
+static bool emitDebugLabelComment(const MachineInstr *MI, AsmPrinter &AP) {
+ if (MI->getNumOperands() != 1)
+ return false;
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << "DEBUG_LABEL: ";
+
+ const DILabel *V = MI->getDebugLabel();
+ if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) {
+ StringRef Name = SP->getName();
+ if (!Name.empty())
+ OS << Name << ":";
+ }
+ OS << V->getName();
+
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ AP.OutStreamer->emitRawComment(OS.str());
+ return true;
+}
+
AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const {
if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI &&
MF->getFunction().needsUnwindTableEntry())
@@ -952,7 +990,8 @@ void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
if (!MF.getTarget().Options.EmitStackSizeSection)
return;
- MCSection *StackSizeSection = getObjFileLowering().getStackSizesSection();
+ MCSection *StackSizeSection =
+ getObjFileLowering().getStackSizesSection(*getCurrentSection());
if (!StackSizeSection)
return;
@@ -964,10 +1003,9 @@ void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
OutStreamer->PushSection();
OutStreamer->SwitchSection(StackSizeSection);
- const MCSymbol *FunctionSymbol = getSymbol(&MF.getFunction());
+ const MCSymbol *FunctionSymbol = getFunctionBegin();
uint64_t StackSize = FrameInfo.getStackSize();
- OutStreamer->EmitValue(MCSymbolRefExpr::create(FunctionSymbol, OutContext),
- /* size = */ 8);
+ OutStreamer->EmitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
OutStreamer->EmitULEB128IntValue(StackSize);
OutStreamer->PopSection();
@@ -996,6 +1034,24 @@ void AsmPrinter::EmitFunctionBody() {
bool ShouldPrintDebugScopes = MMI->hasDebugInfo();
+ if (isVerbose()) {
+ // Get MachineDominatorTree or compute it on the fly if it's unavailable
+ MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ if (!MDT) {
+ OwnedMDT = make_unique<MachineDominatorTree>();
+ OwnedMDT->getBase().recalculate(*MF);
+ MDT = OwnedMDT.get();
+ }
+
+ // Get MachineLoopInfo or compute it on the fly if it's unavailable
+ MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ if (!MLI) {
+ OwnedMLI = make_unique<MachineLoopInfo>();
+ OwnedMLI->getBase().analyze(MDT->getBase());
+ MLI = OwnedMLI.get();
+ }
+ }
+
// Print out code for the function.
bool HasAnyRealCode = false;
int NumInstsInFunction = 0;
@@ -1005,7 +1061,7 @@ void AsmPrinter::EmitFunctionBody() {
for (auto &MI : MBB) {
// Print the assembly for the instruction.
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
- !MI.isDebugValue()) {
+ !MI.isDebugInstr()) {
HasAnyRealCode = true;
++NumInstsInFunction;
}
@@ -1044,6 +1100,12 @@ void AsmPrinter::EmitFunctionBody() {
EmitInstruction(&MI);
}
break;
+ case TargetOpcode::DBG_LABEL:
+ if (isVerbose()) {
+ if (!emitDebugLabelComment(&MI, *this))
+ EmitInstruction(&MI);
+ }
+ break;
case TargetOpcode::IMPLICIT_DEF:
if (isVerbose()) emitImplicitDef(&MI);
break;
@@ -1155,7 +1217,7 @@ void AsmPrinter::EmitFunctionBody() {
OutStreamer->AddBlankLine();
}
-/// \brief Compute the number of Global Variables that uses a Constant.
+/// Compute the number of Global Variables that uses a Constant.
static unsigned getNumGlobalVariableUses(const Constant *C) {
if (!C)
return 0;
@@ -1170,7 +1232,7 @@ static unsigned getNumGlobalVariableUses(const Constant *C) {
return NumUses;
}
-/// \brief Only consider global GOT equivalents if at least one user is a
+/// Only consider global GOT equivalents if at least one user is a
/// cstexpr inside an initializer of another global variables. Also, don't
/// handle cstexpr inside instructions. During global variable emission,
/// candidates are skipped and are emitted later in case at least one cstexpr
@@ -1193,7 +1255,7 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
return NumGOTEquivUsers > 0;
}
-/// \brief Unnamed constant global variables solely contaning a pointer to
+/// Unnamed constant global variables solely contaning a pointer to
/// another globals variable is equivalent to a GOT table entry; it contains the
/// the address of another symbol. Optimize it and replace accesses to these
/// "GOT equivalents" by using the GOT entry for the final global instead.
@@ -1214,7 +1276,7 @@ void AsmPrinter::computeGlobalGOTEquivs(Module &M) {
}
}
-/// \brief Constant expressions using GOT equivalent globals may not be eligible
+/// Constant expressions using GOT equivalent globals may not be eligible
/// for PC relative GOT entry conversion, in such cases we need to emit such
/// globals we previously omitted in EmitGlobalVariable.
void AsmPrinter::emitGlobalGOTEquivs() {
@@ -1312,7 +1374,7 @@ bool AsmPrinter::doFinalization(Module &M) {
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
- TLOF.emitModuleMetadata(*OutStreamer, M, TM);
+ TLOF.emitModuleMetadata(*OutStreamer, M);
if (TM.getTargetTriple().isOSBinFormatELF()) {
MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
@@ -1323,6 +1385,7 @@ bool AsmPrinter::doFinalization(Module &M) {
OutStreamer->SwitchSection(TLOF.getDataSection());
const DataLayout &DL = M.getDataLayout();
+ EmitAlignment(Log2_32(DL.getPointerSize()));
for (const auto &Stub : Stubs) {
OutStreamer->EmitLabel(Stub.first);
OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
@@ -1421,6 +1484,61 @@ bool AsmPrinter::doFinalization(Module &M) {
if (MCSection *S = MAI->getNonexecutableStackSection(OutContext))
OutStreamer->SwitchSection(S);
+ if (TM.getTargetTriple().isOSBinFormatCOFF()) {
+ // Emit /EXPORT: flags for each exported global as necessary.
+ const auto &TLOF = getObjFileLowering();
+ std::string Flags;
+
+ for (const GlobalValue &GV : M.global_values()) {
+ raw_string_ostream OS(Flags);
+ TLOF.emitLinkerFlagsForGlobal(OS, &GV);
+ OS.flush();
+ if (!Flags.empty()) {
+ OutStreamer->SwitchSection(TLOF.getDrectveSection());
+ OutStreamer->EmitBytes(Flags);
+ }
+ Flags.clear();
+ }
+
+ // Emit /INCLUDE: flags for each used global as necessary.
+ if (const auto *LU = M.getNamedGlobal("llvm.used")) {
+ assert(LU->hasInitializer() &&
+ "expected llvm.used to have an initializer");
+ assert(isa<ArrayType>(LU->getValueType()) &&
+ "expected llvm.used to be an array type");
+ if (const auto *A = cast<ConstantArray>(LU->getInitializer())) {
+ for (const Value *Op : A->operands()) {
+ const auto *GV =
+ cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases());
+ // Global symbols with internal or private linkage are not visible to
+ // the linker, and thus would cause an error when the linker tried to
+ // preserve the symbol due to the `/include:` directive.
+ if (GV->hasLocalLinkage())
+ continue;
+
+ raw_string_ostream OS(Flags);
+ TLOF.emitLinkerFlagsForUsed(OS, GV);
+ OS.flush();
+
+ if (!Flags.empty()) {
+ OutStreamer->SwitchSection(TLOF.getDrectveSection());
+ OutStreamer->EmitBytes(Flags);
+ }
+ Flags.clear();
+ }
+ }
+ }
+ }
+
+ if (TM.Options.EmitAddrsig) {
+ // Emit address-significance attributes for all globals.
+ OutStreamer->EmitAddrsig();
+ for (const GlobalValue &GV : M.global_values())
+ if (!GV.isThreadLocal() && !GV.getName().startswith("llvm.") &&
+ !GV.hasAtLeastLocalUnnamedAddr())
+ OutStreamer->EmitAddrsigSym(getSymbol(&GV));
+ }
+
// Allow the target to emit any magic that it wants at the end of the file,
// after everything else has gone out.
EmitEndOfAsmFile(M);
@@ -1429,6 +1547,8 @@ bool AsmPrinter::doFinalization(Module &M) {
OutStreamer->Finish();
OutStreamer->reset();
+ OwnedMLI.reset();
+ OwnedMDT.reset();
return false;
}
@@ -1447,14 +1567,14 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
CurrentFnBegin = nullptr;
CurExceptionSym = nullptr;
bool NeedsLocalForSize = MAI->needsLocalForSize();
- if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize) {
+ if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize ||
+ MF.getTarget().Options.EmitStackSizeSection) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
CurrentFnSymForSize = CurrentFnBegin;
}
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
- LI = &getAnalysis<MachineLoopInfo>();
const TargetSubtargetInfo &STI = MF.getSubtarget();
EnablePrintSchedInfo = PrintSchedule.getNumOccurrences()
@@ -1842,22 +1962,27 @@ void AsmPrinter::EmitModuleIdents(Module &M) {
// Emission and print routines
//
-/// EmitInt8 - Emit a byte directive and value.
+/// Emit a byte directive and value.
///
-void AsmPrinter::EmitInt8(int Value) const {
+void AsmPrinter::emitInt8(int Value) const {
OutStreamer->EmitIntValue(Value, 1);
}
-/// EmitInt16 - Emit a short directive and value.
-void AsmPrinter::EmitInt16(int Value) const {
+/// Emit a short directive and value.
+void AsmPrinter::emitInt16(int Value) const {
OutStreamer->EmitIntValue(Value, 2);
}
-/// EmitInt32 - Emit a long directive and value.
-void AsmPrinter::EmitInt32(int Value) const {
+/// Emit a long directive and value.
+void AsmPrinter::emitInt32(int Value) const {
OutStreamer->EmitIntValue(Value, 4);
}
+/// Emit a long long directive and value.
+void AsmPrinter::emitInt64(uint64_t Value) const {
+ OutStreamer->EmitIntValue(Value, 8);
+}
+
/// Emit something like ".long Hi-Lo" where the size in bytes of the directive
/// is specified by Size and Hi/Lo specify the labels. This implicitly uses
/// .set if it avoids relocations.
@@ -2069,6 +2194,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C,
uint64_t Offset = 0);
static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP);
+static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP);
/// isRepeatedByteSequence - Determine whether the given value is
/// composed of a repeated sequence of identical bytes and return the
@@ -2146,13 +2272,15 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL,
ElementByteSize);
}
} else {
+ Type *ET = CDS->getElementType();
for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I)
- emitGlobalConstantFP(cast<ConstantFP>(CDS->getElementAsConstant(I)), AP);
+ emitGlobalConstantFP(CDS->getElementAsAPFloat(I), ET, AP);
}
unsigned Size = DL.getTypeAllocSize(CDS->getType());
unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) *
CDS->getNumElements();
+ assert(EmittedSize <= Size && "Size cannot be less than EmittedSize!");
if (unsigned Padding = Size - EmittedSize)
AP.OutStreamer->EmitZeros(Padding);
}
@@ -2216,17 +2344,17 @@ static void emitGlobalConstantStruct(const DataLayout &DL,
"Layout of constant struct may be incorrect!");
}
-static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
- APInt API = CFP->getValueAPF().bitcastToAPInt();
+static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) {
+ APInt API = APF.bitcastToAPInt();
// First print a comment with what we think the original floating-point value
// should have been.
if (AP.isVerbose()) {
SmallString<8> StrVal;
- CFP->getValueAPF().toString(StrVal);
+ APF.toString(StrVal);
- if (CFP->getType())
- CFP->getType()->print(AP.OutStreamer->GetCommentOS());
+ if (ET)
+ ET->print(AP.OutStreamer->GetCommentOS());
else
AP.OutStreamer->GetCommentOS() << "Printing <null> Type";
AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n';
@@ -2241,7 +2369,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
// PPC's long double has odd notions of endianness compared to how LLVM
// handles it: p[0] goes first for *big* endian on PPC.
- if (AP.getDataLayout().isBigEndian() && !CFP->getType()->isPPC_FP128Ty()) {
+ if (AP.getDataLayout().isBigEndian() && !ET->isPPC_FP128Ty()) {
int Chunk = API.getNumWords() - 1;
if (TrailingBytes)
@@ -2260,8 +2388,11 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
// Emit the tail padding for the long double.
const DataLayout &DL = AP.getDataLayout();
- AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(CFP->getType()) -
- DL.getTypeStoreSize(CFP->getType()));
+ AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(ET) - DL.getTypeStoreSize(ET));
+}
+
+static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
+ emitGlobalConstantFP(CFP->getValueAPF(), CFP->getType(), AP);
}
static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
@@ -2320,7 +2451,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
}
}
-/// \brief Transform a not absolute MCExpr containing a reference to a GOT
+/// Transform a not absolute MCExpr containing a reference to a GOT
/// equivalent global, by a target specific GOT pc relative access to the
/// final symbol.
static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
@@ -2533,6 +2664,25 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
/// GetCPISymbol - Return the symbol for the specified constant pool entry.
MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
+ if (getSubtargetInfo().getTargetTriple().isKnownWindowsMSVCEnvironment()) {
+ const MachineConstantPoolEntry &CPE =
+ MF->getConstantPool()->getConstants()[CPID];
+ if (!CPE.isMachineConstantPoolEntry()) {
+ const DataLayout &DL = MF->getDataLayout();
+ SectionKind Kind = CPE.getSectionKind(&DL);
+ const Constant *C = CPE.Val.ConstVal;
+ unsigned Align = CPE.Alignment;
+ if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>(
+ getObjFileLowering().getSectionForConstant(DL, Kind, C, Align))) {
+ if (MCSymbol *Sym = S->getCOMDATSymbol()) {
+ if (Sym->isUndefined())
+ OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global);
+ return Sym;
+ }
+ }
+ }
+ }
+
const DataLayout &DL = getDataLayout();
return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
"CPI" + Twine(getFunctionNumber()) + "_" +
@@ -2631,13 +2781,9 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB,
MCCodePaddingContext &Context) const {
assert(MF != nullptr && "Machine function must be valid");
- assert(LI != nullptr && "Loop info must be valid");
Context.IsPaddingActive = !MF->hasInlineAsm() &&
!MF->getFunction().optForSize() &&
TM.getOptLevel() != CodeGenOpt::None;
- const MachineLoop *CurrentLoop = LI->getLoopFor(&MBB);
- Context.IsBasicBlockInsideInnermostLoop =
- CurrentLoop != nullptr && CurrentLoop->getSubLoops().empty();
Context.IsBasicBlockReachableViaFallthrough =
std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) !=
MBB.pred_end();
@@ -2689,7 +2835,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
OutStreamer->GetCommentOS() << '\n';
}
}
- emitBasicBlockLoopComments(MBB, LI, *this);
+
+ assert(MLI != nullptr && "MachineLoopInfo should has been computed");
+ emitBasicBlockLoopComments(MBB, MLI, *this);
}
// Print the main label for the block.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 08eb14e242c5..605588470670 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -26,6 +25,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -43,15 +43,6 @@ void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const {
OutStreamer->EmitSLEB128IntValue(Value);
}
-/// EmitULEB128 - emit the specified unsigned leb128 value.
-void AsmPrinter::EmitPaddedULEB128(uint64_t Value, unsigned PadTo,
- const char *Desc) const {
- if (isVerbose() && Desc)
- OutStreamer->AddComment(Desc);
-
- OutStreamer->EmitPaddedULEB128IntValue(Value, PadTo);
-}
-
void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc) const {
if (isVerbose() && Desc)
OutStreamer->AddComment(Desc);
@@ -59,6 +50,12 @@ void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc) const {
OutStreamer->EmitULEB128IntValue(Value);
}
+/// Emit something like ".uleb128 Hi-Lo".
+void AsmPrinter::EmitLabelDifferenceAsULEB128(const MCSymbol *Hi,
+ const MCSymbol *Lo) const {
+ OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo);
+}
+
static const char *DecodeDWARFEncoding(unsigned Encoding) {
switch (Encoding) {
case dwarf::DW_EH_PE_absptr:
@@ -67,6 +64,10 @@ static const char *DecodeDWARFEncoding(unsigned Encoding) {
return "omit";
case dwarf::DW_EH_PE_pcrel:
return "pcrel";
+ case dwarf::DW_EH_PE_uleb128:
+ return "uleb128";
+ case dwarf::DW_EH_PE_sleb128:
+ return "sleb128";
case dwarf::DW_EH_PE_udata4:
return "udata4";
case dwarf::DW_EH_PE_udata8:
@@ -167,14 +168,19 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,
EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);
}
-void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const {
+void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const {
if (MAI->doesDwarfUseRelocationsAcrossSections()) {
- emitDwarfSymbolReference(S.getSymbol());
+ assert(S.Symbol && "No symbol available");
+ emitDwarfSymbolReference(S.Symbol);
return;
}
// Just emit the offset directly; no need for symbol math.
- EmitInt32(S.getOffset());
+ emitInt32(S.Offset);
+}
+
+void AsmPrinter::EmitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const {
+ EmitLabelPlusOffset(Label, Offset, MAI->getCodePointerSize());
}
//===----------------------------------------------------------------------===//
@@ -252,7 +258,7 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
emitDwarfDIE(Child);
OutStreamer->AddComment("End Of Children Mark");
- EmitInt8(0);
+ emitInt8(0);
}
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
index 638226e90a7a..f5ac95a20b10 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
@@ -27,29 +27,29 @@ class MCSymbol;
typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm);
-/// \brief Collects and handles AsmPrinter objects required to build debug
+/// Collects and handles AsmPrinter objects required to build debug
/// or EH information.
class AsmPrinterHandler {
public:
virtual ~AsmPrinterHandler();
- /// \brief For symbols that have a size designated (e.g. common symbols),
+ /// For symbols that have a size designated (e.g. common symbols),
/// this tracks that size.
virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0;
- /// \brief Emit all sections that should come after the content.
+ /// Emit all sections that should come after the content.
virtual void endModule() = 0;
- /// \brief Gather pre-function debug information.
+ /// Gather pre-function debug information.
/// Every beginFunction(MF) call should be followed by an endFunction(MF)
/// call.
virtual void beginFunction(const MachineFunction *MF) = 0;
- // \brief Emit any of function marker (like .cfi_endproc). This is called
+ // Emit any of function marker (like .cfi_endproc). This is called
// before endFunction and cannot switch sections.
virtual void markFunctionEnd();
- /// \brief Gather post-function debug information.
+ /// Gather post-function debug information.
/// Please note that some AsmPrinter implementations may not call
/// beginFunction at all.
virtual void endFunction(const MachineFunction *MF) = 0;
@@ -58,15 +58,15 @@ public:
ExceptionSymbolProvider ESP) {}
virtual void endFragment() {}
- /// \brief Emit target-specific EH funclet machinery.
+ /// Emit target-specific EH funclet machinery.
virtual void beginFunclet(const MachineBasicBlock &MBB,
MCSymbol *Sym = nullptr) {}
virtual void endFunclet() {}
- /// \brief Process beginning of an instruction.
+ /// Process beginning of an instruction.
virtual void beginInstruction(const MachineInstr *MI) = 0;
- /// \brief Process end of an instruction.
+ /// Process end of an instruction.
virtual void endInstruction() = 0;
};
} // End of namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 04a72ba3d738..4159eb19423a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -132,6 +132,9 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
+ // Do not use assembler-level information for parsing inline assembly.
+ OutStreamer->setUseAssemblerInfoForParsing(false);
+
// We create a new MCInstrInfo here since we might be at the module level
// and not have a MachineFunction to initialize the TargetInstrInfo from and
// we only need MCInstrInfo for asm parsing. We create one unconditionally
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
index aaf6180c9404..2163cc7e3e11 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -43,7 +43,7 @@ public:
APByteStreamer(AsmPrinter &Asm) : AP(Asm) {}
void EmitInt8(uint8_t Byte, const Twine &Comment) override {
AP.OutStreamer->AddComment(Comment);
- AP.EmitInt8(Byte);
+ AP.emitInt8(Byte);
}
void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
AP.OutStreamer->AddComment(Comment);
@@ -76,7 +76,7 @@ private:
SmallVectorImpl<char> &Buffer;
SmallVectorImpl<std::string> &Comments;
- /// \brief Only verbose textual output needs comments. This will be set to
+ /// Only verbose textual output needs comments. This will be set to
/// true for that case, and false otherwise. If false, comments passed in to
/// the emit methods will be ignored.
bool GenerateComments;
@@ -93,15 +93,27 @@ public:
}
void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
raw_svector_ostream OSE(Buffer);
- encodeSLEB128(DWord, OSE);
- if (GenerateComments)
+ unsigned Length = encodeSLEB128(DWord, OSE);
+ if (GenerateComments) {
Comments.push_back(Comment.str());
+ // Add some empty comments to keep the Buffer and Comments vectors aligned
+ // with each other.
+ for (size_t i = 1; i < Length; ++i)
+ Comments.push_back("");
+
+ }
}
void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
raw_svector_ostream OSE(Buffer);
- encodeULEB128(DWord, OSE);
- if (GenerateComments)
+ unsigned Length = encodeULEB128(DWord, OSE);
+ if (GenerateComments) {
Comments.push_back(Comment.str());
+ // Add some empty comments to keep the Buffer and Comments vectors aligned
+ // with each other.
+ for (size_t i = 1; i < Length; ++i)
+ Comments.push_back("");
+
+ }
}
};
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 1d0a003dc50a..8c5c5478d01a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -36,7 +36,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
@@ -75,6 +74,7 @@
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
@@ -114,6 +114,16 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
StringRef Dir = File->getDirectory(), Filename = File->getFilename();
+ // If this is a Unix-style path, just use it as is. Don't try to canonicalize
+ // it textually because one of the path components could be a symlink.
+ if (!Dir.empty() && Dir[0] == '/') {
+ Filepath = Dir;
+ if (Dir.back() != '/')
+ Filepath += '/';
+ Filepath += Filename;
+ return Filepath;
+ }
+
// Clang emits directory and relative filename info into the IR, but CodeView
// operates on full paths. We could change Clang to emit full paths too, but
// that would increase the IR size and probably not needed for other users.
@@ -165,14 +175,21 @@ unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {
auto Insertion = FileIdMap.insert(std::make_pair(FullPath, NextId));
if (Insertion.second) {
// We have to compute the full filepath and emit a .cv_file directive.
- std::string Checksum = fromHex(F->getChecksum());
- void *CKMem = OS.getContext().allocate(Checksum.size(), 1);
- memcpy(CKMem, Checksum.data(), Checksum.size());
- ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
- Checksum.size());
- DIFile::ChecksumKind ChecksumKind = F->getChecksumKind();
+ ArrayRef<uint8_t> ChecksumAsBytes;
+ FileChecksumKind CSKind = FileChecksumKind::None;
+ if (F->getChecksum()) {
+ std::string Checksum = fromHex(F->getChecksum()->Value);
+ void *CKMem = OS.getContext().allocate(Checksum.size(), 1);
+ memcpy(CKMem, Checksum.data(), Checksum.size());
+ ChecksumAsBytes = ArrayRef<uint8_t>(
+ reinterpret_cast<const uint8_t *>(CKMem), Checksum.size());
+ switch (F->getChecksum()->Kind) {
+ case DIFile::CSK_MD5: CSKind = FileChecksumKind::MD5; break;
+ case DIFile::CSK_SHA1: CSKind = FileChecksumKind::SHA1; break;
+ }
+ }
bool Success = OS.EmitCVFileDirective(NextId, FullPath, ChecksumAsBytes,
- static_cast<unsigned>(ChecksumKind));
+ static_cast<unsigned>(CSKind));
(void)Success;
assert(Success && ".cv_file directive failed");
}
@@ -358,15 +375,15 @@ unsigned CodeViewDebug::getPointerSizeInBytes() {
}
void CodeViewDebug::recordLocalVariable(LocalVariable &&Var,
- const DILocation *InlinedAt) {
- if (InlinedAt) {
+ const LexicalScope *LS) {
+ if (const DILocation *InlinedAt = LS->getInlinedAt()) {
// This variable was inlined. Associate it with the InlineSite.
const DISubprogram *Inlinee = Var.DIVar->getScope()->getSubprogram();
InlineSite &Site = getInlineSite(InlinedAt, Inlinee);
Site.InlinedLocals.emplace_back(Var);
} else {
- // This variable goes in the main ProcSym.
- CurFn->Locals.emplace_back(Var);
+ // This variable goes into the corresponding lexical scope.
+ ScopeVariables[LS].emplace_back(Var);
}
}
@@ -463,7 +480,7 @@ void CodeViewDebug::endModule() {
// Emit per-function debug information.
for (auto &P : FnDebugInfo)
if (!P.first->isDeclarationForLinker())
- emitDebugInfoForFunction(P.first, P.second);
+ emitDebugInfoForFunction(P.first, *P.second);
// Emit global variable debug information.
setCurrentSubprogram(nullptr);
@@ -501,12 +518,12 @@ void CodeViewDebug::endModule() {
clear();
}
-static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S) {
+static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S,
+ unsigned MaxFixedRecordLength = 0xF00) {
// The maximum CV record length is 0xFF00. Most of the strings we emit appear
// after a fixed length portion of the record. The fixed length portion should
// always be less than 0xF00 (3840) bytes, so truncate the string so that the
// overall record size is less than the maximum allowed.
- unsigned MaxFixedRecordLength = 0xF00;
SmallString<32> NullTerminatedString(
S.take_front(MaxRecordLength - MaxFixedRecordLength - 1));
NullTerminatedString.push_back('\0');
@@ -517,7 +534,7 @@ void CodeViewDebug::emitTypeInformation() {
if (TypeTable.empty())
return;
- // Start the .debug$T section with 0x4.
+ // Start the .debug$T or .debug$P section with 0x4.
OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection());
emitCodeViewMagicVersion();
@@ -572,7 +589,7 @@ void CodeViewDebug::emitTypeGlobalHashes() {
OS.AddComment("Section Version");
OS.EmitIntValue(0, 2);
OS.AddComment("Hash Algorithm");
- OS.EmitIntValue(uint16_t(GlobalTypeHashAlg::SHA1), 2);
+ OS.EmitIntValue(uint16_t(GlobalTypeHashAlg::SHA1_8), 2);
TypeIndex TI(TypeIndex::FirstNonSimpleIndex);
for (const auto &GHR : TypeTable.hashes()) {
@@ -585,7 +602,7 @@ void CodeViewDebug::emitTypeGlobalHashes() {
OS.AddComment(Comment);
++TI;
}
- assert(GHR.Hash.size() % 20 == 0);
+ assert(GHR.Hash.size() == 8);
StringRef S(reinterpret_cast<const char *>(GHR.Hash.data()),
GHR.Hash.size());
OS.EmitBinaryData(S);
@@ -821,10 +838,61 @@ void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) {
emitCodeViewMagicVersion();
}
+// Emit an S_THUNK32/S_END symbol pair for a thunk routine.
+// The only supported thunk ordinal is currently the standard type.
+void CodeViewDebug::emitDebugInfoForThunk(const Function *GV,
+ FunctionInfo &FI,
+ const MCSymbol *Fn) {
+ std::string FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName());
+ const ThunkOrdinal ordinal = ThunkOrdinal::Standard; // Only supported kind.
+
+ OS.AddComment("Symbol subsection for " + Twine(FuncName));
+ MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
+
+ // Emit S_THUNK32
+ MCSymbol *ThunkRecordBegin = MMI->getContext().createTempSymbol(),
+ *ThunkRecordEnd = MMI->getContext().createTempSymbol();
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(ThunkRecordEnd, ThunkRecordBegin, 2);
+ OS.EmitLabel(ThunkRecordBegin);
+ OS.AddComment("Record kind: S_THUNK32");
+ OS.EmitIntValue(unsigned(SymbolKind::S_THUNK32), 2);
+ OS.AddComment("PtrParent");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("PtrEnd");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("PtrNext");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("Thunk section relative address");
+ OS.EmitCOFFSecRel32(Fn, /*Offset=*/0);
+ OS.AddComment("Thunk section index");
+ OS.EmitCOFFSectionIndex(Fn);
+ OS.AddComment("Code size");
+ OS.emitAbsoluteSymbolDiff(FI.End, Fn, 2);
+ OS.AddComment("Ordinal");
+ OS.EmitIntValue(unsigned(ordinal), 1);
+ OS.AddComment("Function name");
+ emitNullTerminatedSymbolName(OS, FuncName);
+ // Additional fields specific to the thunk ordinal would go here.
+ OS.EmitLabel(ThunkRecordEnd);
+
+ // Local variables/inlined routines are purposely omitted here. The point of
+ // marking this as a thunk is so Visual Studio will NOT stop in this routine.
+
+ // Emit S_PROC_ID_END
+ const unsigned RecordLengthForSymbolEnd = 2;
+ OS.AddComment("Record length");
+ OS.EmitIntValue(RecordLengthForSymbolEnd, 2);
+ OS.AddComment("Record kind: S_PROC_ID_END");
+ OS.EmitIntValue(unsigned(SymbolKind::S_PROC_ID_END), 2);
+
+ endCVSubsection(SymbolsEnd);
+}
+
void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
FunctionInfo &FI) {
- // For each function there is a separate subsection
- // which holds the PC to file:line table.
+ // For each function there is a separate subsection which holds the PC to
+ // file:line table.
const MCSymbol *Fn = Asm->getSymbol(GV);
assert(Fn);
@@ -836,6 +904,11 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
assert(SP);
setCurrentSubprogram(SP);
+ if (SP->isThunk()) {
+ emitDebugInfoForThunk(GV, FI, Fn);
+ return;
+ }
+
// If we have a display name, build the fully qualified name by walking the
// chain of scopes.
if (!SP->getName().empty())
@@ -898,6 +971,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.EmitLabel(ProcRecordEnd);
emitLocalVariableList(FI.Locals);
+ emitLexicalBlockList(FI.ChildBlocks, FI);
// Emit inlined call site information. Only emit functions inlined directly
// into the parent function. We'll emit the other sites recursively as part
@@ -1018,7 +1092,7 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
LocalVariable Var;
Var.DIVar = VI.Var;
Var.DefRanges.emplace_back(std::move(DefRange));
- recordLocalVariable(std::move(Var), VI.Loc->getInlinedAt());
+ recordLocalVariable(std::move(Var), Scope);
}
}
@@ -1100,7 +1174,7 @@ void CodeViewDebug::calculateRanges(
auto J = std::next(I);
const DIExpression *DIExpr = DVInst->getDebugExpression();
while (J != E &&
- !fragmentsOverlap(DIExpr, J->first->getDebugExpression()))
+ !DIExpr->fragmentsOverlap(J->first->getDebugExpression()))
++J;
if (J != E)
End = getLabelBeforeInsn(J->first);
@@ -1149,14 +1223,15 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
Var.DIVar = DIVar;
calculateRanges(Var, Ranges);
- recordLocalVariable(std::move(Var), InlinedAt);
+ recordLocalVariable(std::move(Var), Scope);
}
}
void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
const Function &GV = MF->getFunction();
- assert(FnDebugInfo.count(&GV) == false);
- CurFn = &FnDebugInfo[&GV];
+ auto Insertion = FnDebugInfo.insert({&GV, llvm::make_unique<FunctionInfo>()});
+ assert(Insertion.second && "function already has info");
+ CurFn = Insertion.first->second.get();
CurFn->FuncId = NextFuncId++;
CurFn->Begin = Asm->getFunctionBegin();
@@ -1261,6 +1336,7 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
return lowerTypePointer(cast<DIDerivedType>(Ty));
case dwarf::DW_TAG_ptr_to_member_type:
return lowerTypeMemberPointer(cast<DIDerivedType>(Ty));
+ case dwarf::DW_TAG_restrict_type:
case dwarf::DW_TAG_const_type:
case dwarf::DW_TAG_volatile_type:
// TODO: add support for DW_TAG_atomic_type here
@@ -1281,6 +1357,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
return lowerTypeClass(cast<DICompositeType>(Ty));
case dwarf::DW_TAG_union_type:
return lowerTypeUnion(cast<DICompositeType>(Ty));
+ case dwarf::DW_TAG_unspecified_type:
+ return TypeIndex::None();
default:
// Use the null type index.
return TypeIndex();
@@ -1308,7 +1386,7 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
DITypeRef ElementTypeRef = Ty->getBaseType();
TypeIndex ElementTypeIndex = getTypeIndex(ElementTypeRef);
// IndexType is size_t, which depends on the bitness of the target.
- TypeIndex IndexType = Asm->TM.getPointerSize() == 8
+ TypeIndex IndexType = getPointerSizeInBytes() == 8
? TypeIndex(SimpleTypeKind::UInt64Quad)
: TypeIndex(SimpleTypeKind::UInt32Long);
@@ -1323,7 +1401,9 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
const DISubrange *Subrange = cast<DISubrange>(Element);
assert(Subrange->getLowerBound() == 0 &&
"codeview doesn't support subranges with lower bounds");
- int64_t Count = Subrange->getCount();
+ int64_t Count = -1;
+ if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>())
+ Count = CI->getSExtValue();
// Forward declarations of arrays without a size and VLAs use a count of -1.
// Emit a count of zero in these cases to match what MSVC does for arrays
@@ -1441,12 +1521,13 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
return TypeIndex(STK);
}
-TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) {
+TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty,
+ PointerOptions PO) {
TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType());
- // Pointers to simple types can use SimpleTypeMode, rather than having a
- // dedicated pointer type record.
- if (PointeeTI.isSimple() &&
+ // Pointers to simple types without any options can use SimpleTypeMode, rather
+ // than having a dedicated pointer type record.
+ if (PointeeTI.isSimple() && PO == PointerOptions::None &&
PointeeTI.getSimpleMode() == SimpleTypeMode::Direct &&
Ty->getTag() == dwarf::DW_TAG_pointer_type) {
SimpleTypeMode Mode = Ty->getSizeInBits() == 64
@@ -1470,10 +1551,7 @@ TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) {
PM = PointerMode::RValueReference;
break;
}
- // FIXME: MSVC folds qualifiers into PointerOptions in the context of a method
- // 'this' pointer, but not normal contexts. Figure out what we're supposed to
- // do.
- PointerOptions PO = PointerOptions::None;
+
PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8);
return TypeTable.writeLeafType(PR);
}
@@ -1511,16 +1589,17 @@ translatePtrToMemberRep(unsigned SizeInBytes, bool IsPMF, unsigned Flags) {
llvm_unreachable("invalid ptr to member representation");
}
-TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) {
+TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty,
+ PointerOptions PO) {
assert(Ty->getTag() == dwarf::DW_TAG_ptr_to_member_type);
TypeIndex ClassTI = getTypeIndex(Ty->getClassType());
TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType(), Ty->getClassType());
- PointerKind PK = Asm->TM.getPointerSize() == 8 ? PointerKind::Near64
- : PointerKind::Near32;
+ PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64
+ : PointerKind::Near32;
bool IsPMF = isa<DISubroutineType>(Ty->getBaseType());
PointerMode PM = IsPMF ? PointerMode::PointerToMemberFunction
: PointerMode::PointerToDataMember;
- PointerOptions PO = PointerOptions::None; // FIXME
+
assert(Ty->getSizeInBits() / 8 <= 0xff && "pointer size too big");
uint8_t SizeInBytes = Ty->getSizeInBits() / 8;
MemberPointerInfo MPI(
@@ -1545,6 +1624,7 @@ static CallingConvention dwarfCCToCodeView(unsigned DwarfCC) {
TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
ModifierOptions Mods = ModifierOptions::None;
+ PointerOptions PO = PointerOptions::None;
bool IsModifier = true;
const DIType *BaseTy = Ty;
while (IsModifier && BaseTy) {
@@ -1552,9 +1632,16 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
switch (BaseTy->getTag()) {
case dwarf::DW_TAG_const_type:
Mods |= ModifierOptions::Const;
+ PO |= PointerOptions::Const;
break;
case dwarf::DW_TAG_volatile_type:
Mods |= ModifierOptions::Volatile;
+ PO |= PointerOptions::Volatile;
+ break;
+ case dwarf::DW_TAG_restrict_type:
+ // Only pointer types be marked with __restrict. There is no known flag
+ // for __restrict in LF_MODIFIER records.
+ PO |= PointerOptions::Restrict;
break;
default:
IsModifier = false;
@@ -1563,7 +1650,31 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
if (IsModifier)
BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType().resolve();
}
+
+ // Check if the inner type will use an LF_POINTER record. If so, the
+ // qualifiers will go in the LF_POINTER record. This comes up for types like
+ // 'int *const' and 'int *__restrict', not the more common cases like 'const
+ // char *'.
+ if (BaseTy) {
+ switch (BaseTy->getTag()) {
+ case dwarf::DW_TAG_pointer_type:
+ case dwarf::DW_TAG_reference_type:
+ case dwarf::DW_TAG_rvalue_reference_type:
+ return lowerTypePointer(cast<DIDerivedType>(BaseTy), PO);
+ case dwarf::DW_TAG_ptr_to_member_type:
+ return lowerTypeMemberPointer(cast<DIDerivedType>(BaseTy), PO);
+ default:
+ break;
+ }
+ }
+
TypeIndex ModifiedTI = getTypeIndex(BaseTy);
+
+ // Return the base type index if there aren't any modifiers. For example, the
+ // metadata could contain restrict wrappers around non-pointer types.
+ if (Mods == ModifierOptions::None)
+ return ModifiedTI;
+
ModifierRecord MR(ModifiedTI, Mods);
return TypeTable.writeLeafType(MR);
}
@@ -1573,6 +1684,11 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
for (DITypeRef ArgTypeRef : Ty->getTypeArray())
ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef));
+ // MSVC uses type none for variadic argument.
+ if (ReturnAndArgTypeIndices.size() > 1 &&
+ ReturnAndArgTypeIndices.back() == TypeIndex::Void()) {
+ ReturnAndArgTypeIndices.back() = TypeIndex::None();
+ }
TypeIndex ReturnTypeIndex = TypeIndex::Void();
ArrayRef<TypeIndex> ArgTypeIndices = None;
if (!ReturnAndArgTypeIndices.empty()) {
@@ -1602,6 +1718,11 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
for (DITypeRef ArgTypeRef : Ty->getTypeArray())
ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef));
+ // MSVC uses type none for variadic argument.
+ if (ReturnAndArgTypeIndices.size() > 1 &&
+ ReturnAndArgTypeIndices.back() == TypeIndex::Void()) {
+ ReturnAndArgTypeIndices.back() = TypeIndex::None();
+ }
TypeIndex ReturnTypeIndex = TypeIndex::Void();
ArrayRef<TypeIndex> ArgTypeIndices = None;
if (!ReturnAndArgTypeIndices.empty()) {
@@ -1716,6 +1837,26 @@ static ClassOptions getCommonClassOptions(const DICompositeType *Ty) {
return CO;
}
+void CodeViewDebug::addUDTSrcLine(const DIType *Ty, TypeIndex TI) {
+ switch (Ty->getTag()) {
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_enumeration_type:
+ break;
+ default:
+ return;
+ }
+
+ if (const auto *File = Ty->getFile()) {
+ StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(File));
+ TypeIndex SIDI = TypeTable.writeLeafType(SIDR);
+
+ UdtSourceLineRecord USLR(TI, SIDI, Ty->getLine());
+ TypeTable.writeLeafType(USLR);
+ }
+}
+
TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
ClassOptions CO = getCommonClassOptions(Ty);
TypeIndex FTI;
@@ -1744,7 +1885,11 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
EnumRecord ER(EnumeratorCount, CO, FTI, FullName, Ty->getIdentifier(),
getTypeIndex(Ty->getBaseType()));
- return TypeTable.writeLeafType(ER);
+ TypeIndex EnumTI = TypeTable.writeLeafType(ER);
+
+ addUDTSrcLine(Ty, EnumTI);
+
+ return EnumTI;
}
//===----------------------------------------------------------------------===//
@@ -1793,12 +1938,33 @@ void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
Info.Members.push_back({DDTy, 0});
return;
}
- // An unnamed member must represent a nested struct or union. Add all the
- // indirect fields to the current record.
+
+ // An unnamed member may represent a nested struct or union. Attempt to
+ // interpret the unnamed member as a DICompositeType possibly wrapped in
+ // qualifier types. Add all the indirect fields to the current record if that
+ // succeeds, and drop the member if that fails.
assert((DDTy->getOffsetInBits() % 8) == 0 && "Unnamed bitfield member!");
uint64_t Offset = DDTy->getOffsetInBits();
const DIType *Ty = DDTy->getBaseType().resolve();
- const DICompositeType *DCTy = cast<DICompositeType>(Ty);
+ bool FullyResolved = false;
+ while (!FullyResolved) {
+ switch (Ty->getTag()) {
+ case dwarf::DW_TAG_const_type:
+ case dwarf::DW_TAG_volatile_type:
+ // FIXME: we should apply the qualifier types to the indirect fields
+ // rather than dropping them.
+ Ty = cast<DIDerivedType>(Ty)->getBaseType().resolve();
+ break;
+ default:
+ FullyResolved = true;
+ break;
+ }
+ }
+
+ const DICompositeType *DCTy = dyn_cast<DICompositeType>(Ty);
+ if (!DCTy)
+ return;
+
ClassInfo NestedInfo = collectClassInfo(DCTy);
for (const ClassInfo::MemberInfo &IndirectField : NestedInfo.Members)
Info.Members.push_back(
@@ -1838,7 +2004,28 @@ ClassInfo CodeViewDebug::collectClassInfo(const DICompositeType *Ty) {
return Info;
}
+static bool shouldAlwaysEmitCompleteClassType(const DICompositeType *Ty) {
+ // This routine is used by lowerTypeClass and lowerTypeUnion to determine
+ // if a complete type should be emitted instead of a forward reference.
+ return Ty->getName().empty() && Ty->getIdentifier().empty() &&
+ !Ty->isForwardDecl();
+}
+
TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) {
+ // Emit the complete type for unnamed structs. C++ classes with methods
+ // which have a circular reference back to the class type are expected to
+ // be named by the front-end and should not be "unnamed". C unnamed
+ // structs should not have circular references.
+ if (shouldAlwaysEmitCompleteClassType(Ty)) {
+ // If this unnamed complete type is already in the process of being defined
+ // then the description of the type is malformed and cannot be emitted
+ // into CodeView correctly so report a fatal error.
+ auto I = CompleteTypeIndices.find(Ty);
+ if (I != CompleteTypeIndices.end() && I->second == TypeIndex())
+ report_fatal_error("cannot debug circular reference to unnamed type");
+ return getCompleteTypeIndex(Ty);
+ }
+
// First, construct the forward decl. Don't look into Ty to compute the
// forward decl options, since it might not be available in all TUs.
TypeRecordKind Kind = getRecordKind(Ty);
@@ -1875,13 +2062,7 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {
SizeInBytes, FullName, Ty->getIdentifier());
TypeIndex ClassTI = TypeTable.writeLeafType(CR);
- if (const auto *File = Ty->getFile()) {
- StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(File));
- TypeIndex SIDI = TypeTable.writeLeafType(SIDR);
-
- UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine());
- TypeTable.writeLeafType(USLR);
- }
+ addUDTSrcLine(Ty, ClassTI);
addToUDTs(Ty);
@@ -1889,6 +2070,10 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {
}
TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) {
+ // Emit the complete type for unnamed unions.
+ if (shouldAlwaysEmitCompleteClassType(Ty))
+ return getCompleteTypeIndex(Ty);
+
ClassOptions CO =
ClassOptions::ForwardReference | getCommonClassOptions(Ty);
std::string FullName = getFullyQualifiedName(Ty);
@@ -1917,11 +2102,7 @@ TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) {
Ty->getIdentifier());
TypeIndex UnionTI = TypeTable.writeLeafType(UR);
- StringIdRecord SIR(TypeIndex(0x0), getFullFilepath(Ty->getFile()));
- TypeIndex SIRI = TypeTable.writeLeafType(SIR);
-
- UdtSourceLineRecord USLR(UnionTI, SIRI, Ty->getLine());
- TypeTable.writeLeafType(USLR);
+ addUDTSrcLine(Ty, UnionTI);
addToUDTs(Ty);
@@ -1943,8 +2124,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
for (const DIDerivedType *I : Info.Inheritance) {
if (I->getFlags() & DINode::FlagVirtual) {
// Virtual base.
- // FIXME: Emit VBPtrOffset when the frontend provides it.
- unsigned VBPtrOffset = 0;
+ unsigned VBPtrOffset = I->getVBPtrOffset();
// FIXME: Despite the accessor name, the offset is really in bytes.
unsigned VBTableIndex = I->getOffsetInBits() / 4;
auto RecordKind = (I->getFlags() & DINode::FlagIndirectVirtualBase) == DINode::FlagIndirectVirtualBase
@@ -1956,6 +2136,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
VBTableIndex);
ContinuationBuilder.writeMemberType(VBCR);
+ MemberCount++;
} else {
assert(I->getOffsetInBits() % 8 == 0 &&
"bases must be on byte boundaries");
@@ -1963,6 +2144,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
getTypeIndex(I->getBaseType()),
I->getOffsetInBits() / 8);
ContinuationBuilder.writeMemberType(BCR);
+ MemberCount++;
}
}
@@ -2121,9 +2303,7 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
return getTypeIndex(Ty);
}
- // Check if we've already translated the complete record type. Lowering a
- // complete type should never trigger lowering another complete type, so we
- // can reuse the hash table lookup result.
+ // Check if we've already translated the complete record type.
const auto *CTy = cast<DICompositeType>(Ty);
auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()});
if (!InsertResult.second)
@@ -2134,13 +2314,16 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
// Make sure the forward declaration is emitted first. It's unclear if this
// is necessary, but MSVC does it, and we should follow suit until we can show
// otherwise.
- TypeIndex FwdDeclTI = getTypeIndex(CTy);
+ // We only emit a forward declaration for named types.
+ if (!CTy->getName().empty() || !CTy->getIdentifier().empty()) {
+ TypeIndex FwdDeclTI = getTypeIndex(CTy);
- // Just use the forward decl if we don't have complete type info. This might
- // happen if the frontend is using modules and expects the complete definition
- // to be emitted elsewhere.
- if (CTy->isForwardDecl())
- return FwdDeclTI;
+ // Just use the forward decl if we don't have complete type info. This
+ // might happen if the frontend is using modules and expects the complete
+ // definition to be emitted elsewhere.
+ if (CTy->isForwardDecl())
+ return FwdDeclTI;
+ }
TypeIndex TI;
switch (CTy->getTag()) {
@@ -2155,7 +2338,11 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
llvm_unreachable("not a record");
}
- InsertResult.first->second = TI;
+ // Update the type index associated with this CompositeType. This cannot
+ // use the 'InsertResult' iterator above because it is potentially
+ // invalidated by map insertions which can occur while lowering the class
+ // type above.
+ CompleteTypeIndices[CTy] = TI;
return TI;
}
@@ -2179,10 +2366,10 @@ void CodeViewDebug::emitLocalVariableList(ArrayRef<LocalVariable> Locals) {
for (const LocalVariable &L : Locals)
if (L.DIVar->isParameter())
Params.push_back(&L);
- std::sort(Params.begin(), Params.end(),
- [](const LocalVariable *L, const LocalVariable *R) {
- return L->DIVar->getArg() < R->DIVar->getArg();
- });
+ llvm::sort(Params.begin(), Params.end(),
+ [](const LocalVariable *L, const LocalVariable *R) {
+ return L->DIVar->getArg() < R->DIVar->getArg();
+ });
for (const LocalVariable *L : Params)
emitLocalVariable(*L);
@@ -2272,15 +2459,150 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
}
}
+void CodeViewDebug::emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks,
+ const FunctionInfo& FI) {
+ for (LexicalBlock *Block : Blocks)
+ emitLexicalBlock(*Block, FI);
+}
+
+/// Emit an S_BLOCK32 and S_END record pair delimiting the contents of a
+/// lexical block scope.
+void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block,
+ const FunctionInfo& FI) {
+ MCSymbol *RecordBegin = MMI->getContext().createTempSymbol(),
+ *RecordEnd = MMI->getContext().createTempSymbol();
+
+ // Lexical block symbol record.
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(RecordEnd, RecordBegin, 2); // Record Length
+ OS.EmitLabel(RecordBegin);
+ OS.AddComment("Record kind: S_BLOCK32");
+ OS.EmitIntValue(SymbolKind::S_BLOCK32, 2); // Record Kind
+ OS.AddComment("PtrParent");
+ OS.EmitIntValue(0, 4); // PtrParent
+ OS.AddComment("PtrEnd");
+ OS.EmitIntValue(0, 4); // PtrEnd
+ OS.AddComment("Code size");
+ OS.emitAbsoluteSymbolDiff(Block.End, Block.Begin, 4); // Code Size
+ OS.AddComment("Function section relative address");
+ OS.EmitCOFFSecRel32(Block.Begin, /*Offset=*/0); // Func Offset
+ OS.AddComment("Function section index");
+ OS.EmitCOFFSectionIndex(FI.Begin); // Func Symbol
+ OS.AddComment("Lexical block name");
+ emitNullTerminatedSymbolName(OS, Block.Name); // Name
+ OS.EmitLabel(RecordEnd);
+
+ // Emit variables local to this lexical block.
+ emitLocalVariableList(Block.Locals);
+
+ // Emit lexical blocks contained within this block.
+ emitLexicalBlockList(Block.Children, FI);
+
+ // Close the lexical block scope.
+ OS.AddComment("Record length");
+ OS.EmitIntValue(2, 2); // Record Length
+ OS.AddComment("Record kind: S_END");
+ OS.EmitIntValue(SymbolKind::S_END, 2); // Record Kind
+}
+
+/// Convenience routine for collecting lexical block information for a list
+/// of lexical scopes.
+void CodeViewDebug::collectLexicalBlockInfo(
+ SmallVectorImpl<LexicalScope *> &Scopes,
+ SmallVectorImpl<LexicalBlock *> &Blocks,
+ SmallVectorImpl<LocalVariable> &Locals) {
+ for (LexicalScope *Scope : Scopes)
+ collectLexicalBlockInfo(*Scope, Blocks, Locals);
+}
+
+/// Populate the lexical blocks and local variable lists of the parent with
+/// information about the specified lexical scope.
+void CodeViewDebug::collectLexicalBlockInfo(
+ LexicalScope &Scope,
+ SmallVectorImpl<LexicalBlock *> &ParentBlocks,
+ SmallVectorImpl<LocalVariable> &ParentLocals) {
+ if (Scope.isAbstractScope())
+ return;
+
+ auto LocalsIter = ScopeVariables.find(&Scope);
+ if (LocalsIter == ScopeVariables.end()) {
+ // This scope does not contain variables and can be eliminated.
+ collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals);
+ return;
+ }
+ SmallVectorImpl<LocalVariable> &Locals = LocalsIter->second;
+
+ const DILexicalBlock *DILB = dyn_cast<DILexicalBlock>(Scope.getScopeNode());
+ if (!DILB) {
+ // This scope is not a lexical block and can be eliminated, but keep any
+ // local variables it contains.
+ ParentLocals.append(Locals.begin(), Locals.end());
+ collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals);
+ return;
+ }
+
+ const SmallVectorImpl<InsnRange> &Ranges = Scope.getRanges();
+ if (Ranges.size() != 1 || !getLabelAfterInsn(Ranges.front().second)) {
+ // This lexical block scope has too many address ranges to represent in the
+ // current CodeView format or does not have a valid address range.
+ // Eliminate this lexical scope and promote any locals it contains to the
+ // parent scope.
+ //
+ // For lexical scopes with multiple address ranges you may be tempted to
+ // construct a single range covering every instruction where the block is
+ // live and everything in between. Unfortunately, Visual Studio only
+ // displays variables from the first matching lexical block scope. If the
+ // first lexical block contains exception handling code or cold code which
+ // is moved to the bottom of the routine creating a single range covering
+ // nearly the entire routine, then it will hide all other lexical blocks
+ // and the variables they contain.
+ //
+ ParentLocals.append(Locals.begin(), Locals.end());
+ collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals);
+ return;
+ }
+
+ // Create a new CodeView lexical block for this lexical scope. If we've
+ // seen this DILexicalBlock before then the scope tree is malformed and
+ // we can handle this gracefully by not processing it a second time.
+ auto BlockInsertion = CurFn->LexicalBlocks.insert({DILB, LexicalBlock()});
+ if (!BlockInsertion.second)
+ return;
+
+ // Create a lexical block containing the local variables and collect the
+ // the lexical block information for the children.
+ const InsnRange &Range = Ranges.front();
+ assert(Range.first && Range.second);
+ LexicalBlock &Block = BlockInsertion.first->second;
+ Block.Begin = getLabelBeforeInsn(Range.first);
+ Block.End = getLabelAfterInsn(Range.second);
+ assert(Block.Begin && "missing label for scope begin");
+ assert(Block.End && "missing label for scope end");
+ Block.Name = DILB->getName();
+ Block.Locals = std::move(Locals);
+ ParentBlocks.push_back(&Block);
+ collectLexicalBlockInfo(Scope.getChildren(), Block.Children, Block.Locals);
+}
+
void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
const Function &GV = MF->getFunction();
assert(FnDebugInfo.count(&GV));
- assert(CurFn == &FnDebugInfo[&GV]);
+ assert(CurFn == FnDebugInfo[&GV].get());
collectVariableInfo(GV.getSubprogram());
+ // Build the lexical block structure to emit for this routine.
+ if (LexicalScope *CFS = LScopes.getCurrentFunctionScope())
+ collectLexicalBlockInfo(*CFS, CurFn->ChildBlocks, CurFn->Locals);
+
+ // Clear the scope and variable information from the map which will not be
+ // valid after we have finished processing this routine. This also prepares
+ // the map for the subsequent routine.
+ ScopeVariables.clear();
+
// Don't emit anything if we don't have any line tables.
- if (!CurFn->HaveLineInfo) {
+ // Thunks are compiler-generated and probably won't have source correlation.
+ if (!CurFn->HaveLineInfo && !GV.getSubprogram()->isThunk()) {
FnDebugInfo.erase(&GV);
CurFn = nullptr;
return;
@@ -2296,8 +2618,8 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
DebugHandlerBase::beginInstruction(MI);
- // Ignore DBG_VALUE locations and function prologue.
- if (!Asm || !CurFn || MI->isDebugValue() ||
+ // Ignore DBG_VALUE and DBG_LABEL locations and function prologue.
+ if (!Asm || !CurFn || MI->isDebugInstr() ||
MI->getFlag(MachineInstr::FrameSetup))
return;
@@ -2306,7 +2628,7 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
DebugLoc DL = MI->getDebugLoc();
if (!DL && MI->getParent() != PrevInstBB) {
for (const auto &NextMI : *MI->getParent()) {
- if (NextMI.isDebugValue())
+ if (NextMI.isDebugInstr())
continue;
DL = NextMI.getDebugLoc();
if (DL)
@@ -2432,6 +2754,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
// FIXME: Thread local data, etc
MCSymbol *DataBegin = MMI->getContext().createTempSymbol(),
*DataEnd = MMI->getContext().createTempSymbol();
+ const unsigned FixedLengthOfThisRecord = 12;
OS.AddComment("Record length");
OS.emitAbsoluteSymbolDiff(DataEnd, DataBegin, 2);
OS.EmitLabel(DataBegin);
@@ -2459,6 +2782,6 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
OS.AddComment("Segment");
OS.EmitCOFFSectionIndex(GVSym);
OS.AddComment("Name");
- emitNullTerminatedSymbolName(OS, DIGV->getName());
+ emitNullTerminatedSymbolName(OS, DIGV->getName(), FixedLengthOfThisRecord);
OS.EmitLabel(DataEnd);
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 69e93640d7ef..6a0da5f993d0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -48,7 +48,7 @@ class MCStreamer;
class MCSymbol;
class MachineFunction;
-/// \brief Collects and handles line tables information in a CodeView format.
+/// Collects and handles line tables information in a CodeView format.
class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
MCStreamer &OS;
BumpPtrAllocator Allocator;
@@ -107,9 +107,23 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
unsigned SiteFuncId = 0;
};
+ // Combines information from DILexicalBlock and LexicalScope.
+ struct LexicalBlock {
+ SmallVector<LocalVariable, 1> Locals;
+ SmallVector<LexicalBlock *, 1> Children;
+ const MCSymbol *Begin;
+ const MCSymbol *End;
+ StringRef Name;
+ };
+
// For each function, store a vector of labels to its instructions, as well as
// to the end of the function.
struct FunctionInfo {
+ FunctionInfo() = default;
+
+ // Uncopyable.
+ FunctionInfo(const FunctionInfo &FI) = delete;
+
/// Map from inlined call site to inlined instructions and child inlined
/// call sites. Listed in program order.
std::unordered_map<const DILocation *, InlineSite> InlineSites;
@@ -119,6 +133,11 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
SmallVector<LocalVariable, 1> Locals;
+ std::unordered_map<const DILexicalBlockBase*, LexicalBlock> LexicalBlocks;
+
+ // Lexical blocks containing local variables.
+ SmallVector<LexicalBlock *, 1> ChildBlocks;
+
std::vector<std::pair<MCSymbol *, MDNode *>> Annotations;
const MCSymbol *Begin = nullptr;
@@ -129,6 +148,12 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
};
FunctionInfo *CurFn = nullptr;
+ // Map used to seperate variables according to the lexical scope they belong
+ // in. This is populated by recordLocalVariable() before
+ // collectLexicalBlocks() separates the variables between the FunctionInfo
+ // and LexicalBlocks.
+ DenseMap<const LexicalScope *, SmallVector<LocalVariable, 1>> ScopeVariables;
+
/// The set of comdat .debug$S sections that we've seen so far. Each section
/// must start with a magic version number that must only be emitted once.
/// This set tracks which sections we've already opened.
@@ -159,7 +184,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
/// Remember some debug info about each function. Keep it in a stable order to
/// emit at the end of the TU.
- MapVector<const Function *, FunctionInfo> FnDebugInfo;
+ MapVector<const Function *, std::unique_ptr<FunctionInfo>> FnDebugInfo;
/// Map from full file path to .cv_file id. Full paths are built from DIFiles
/// and are stored in FileToFilepathMap;
@@ -200,7 +225,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
using FileToFilepathMapTy = std::map<const DIFile *, std::string>;
FileToFilepathMapTy FileToFilepathMap;
- StringRef getFullFilepath(const DIFile *S);
+ StringRef getFullFilepath(const DIFile *File);
unsigned maybeRecordFile(const DIFile *F);
@@ -214,7 +239,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
}
/// Emit the magic version number at the start of a CodeView type or symbol
- /// section. Appears at the front of every .debug$S or .debug$T section.
+ /// section. Appears at the front of every .debug$S or .debug$T or .debug$P
+ /// section.
void emitCodeViewMagicVersion();
void emitTypeInformation();
@@ -225,6 +251,10 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitInlineeLinesSubsection();
+ void emitDebugInfoForThunk(const Function *GV,
+ FunctionInfo &FI,
+ const MCSymbol *Fn);
+
void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI);
void emitDebugInfoForGlobals();
@@ -253,9 +283,18 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &Processed);
+ // Construct the lexical block tree for a routine, pruning emptpy lexical
+ // scopes, and populate it with local variables.
+ void collectLexicalBlockInfo(SmallVectorImpl<LexicalScope *> &Scopes,
+ SmallVectorImpl<LexicalBlock *> &Blocks,
+ SmallVectorImpl<LocalVariable> &Locals);
+ void collectLexicalBlockInfo(LexicalScope &Scope,
+ SmallVectorImpl<LexicalBlock *> &ParentBlocks,
+ SmallVectorImpl<LocalVariable> &ParentLocals);
+
/// Records information about a local variable in the appropriate scope. In
/// particular, locals from inlined code live inside the inlining site.
- void recordLocalVariable(LocalVariable &&Var, const DILocation *Loc);
+ void recordLocalVariable(LocalVariable &&Var, const LexicalScope *LS);
/// Emits local variables in the appropriate order.
void emitLocalVariableList(ArrayRef<LocalVariable> Locals);
@@ -263,6 +302,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
/// Emits an S_LOCAL record and its associated defined ranges.
void emitLocalVariable(const LocalVariable &Var);
+ /// Emits a sequence of lexical block scopes and their children.
+ void emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks,
+ const FunctionInfo& FI);
+
+ /// Emit a lexical block scope and its children.
+ void emitLexicalBlock(const LexicalBlock &Block, const FunctionInfo& FI);
+
/// Translates the DIType to codeview if necessary and returns a type index
/// for it.
codeview::TypeIndex getTypeIndex(DITypeRef TypeRef,
@@ -279,12 +325,18 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void addToUDTs(const DIType *Ty);
+ void addUDTSrcLine(const DIType *Ty, codeview::TypeIndex TI);
+
codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy);
codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty);
codeview::TypeIndex lowerTypeArray(const DICompositeType *Ty);
codeview::TypeIndex lowerTypeBasic(const DIBasicType *Ty);
- codeview::TypeIndex lowerTypePointer(const DIDerivedType *Ty);
- codeview::TypeIndex lowerTypeMemberPointer(const DIDerivedType *Ty);
+ codeview::TypeIndex lowerTypePointer(
+ const DIDerivedType *Ty,
+ codeview::PointerOptions PO = codeview::PointerOptions::None);
+ codeview::TypeIndex lowerTypeMemberPointer(
+ const DIDerivedType *Ty,
+ codeview::PointerOptions PO = codeview::PointerOptions::None);
codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty);
codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty);
codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty);
@@ -327,21 +379,21 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
unsigned getPointerSizeInBytes();
protected:
- /// \brief Gather pre-function debug information.
+ /// Gather pre-function debug information.
void beginFunctionImpl(const MachineFunction *MF) override;
- /// \brief Gather post-function debug information.
+ /// Gather post-function debug information.
void endFunctionImpl(const MachineFunction *) override;
public:
- CodeViewDebug(AsmPrinter *Asm);
+ CodeViewDebug(AsmPrinter *AP);
void setSymbolSize(const MCSymbol *, uint64_t) override {}
- /// \brief Emit the COFF section that holds the line table information.
+ /// Emit the COFF section that holds the line table information.
void endModule() override;
- /// \brief Process beginning of an instruction.
+ /// Process beginning of an instruction.
void beginInstruction(const MachineInstr *MI) override;
};
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index b3148db30cd6..570424a79c81 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -17,6 +17,7 @@
#include "DwarfUnit.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -86,8 +87,9 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {
// easily, which helps track down where it came from.
if (!dwarf::isValidFormForVersion(AttrData.getForm(),
AP->getDwarfVersion())) {
- DEBUG(dbgs() << "Invalid form " << format("0x%x", AttrData.getForm())
- << " for DWARF version " << AP->getDwarfVersion() << "\n");
+ LLVM_DEBUG(dbgs() << "Invalid form " << format("0x%x", AttrData.getForm())
+ << " for DWARF version " << AP->getDwarfVersion()
+ << "\n");
llvm_unreachable("Invalid form for specified DWARF version");
}
#endif
@@ -388,6 +390,7 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_data2:
case dwarf::DW_FORM_strx2:
case dwarf::DW_FORM_addrx2:
+ case dwarf::DW_FORM_strx3:
case dwarf::DW_FORM_strp:
case dwarf::DW_FORM_ref4:
case dwarf::DW_FORM_data4:
@@ -410,6 +413,7 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_GNU_str_index:
case dwarf::DW_FORM_GNU_addr_index:
case dwarf::DW_FORM_ref_udata:
+ case dwarf::DW_FORM_strx:
case dwarf::DW_FORM_udata:
Asm->EmitULEB128(Integer);
return;
@@ -423,58 +427,23 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
/// SizeOf - Determine size of integer value in bytes.
///
unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ dwarf::FormParams Params = {0, 0, dwarf::DWARF32};
+ if (AP)
+ Params = {AP->getDwarfVersion(), uint8_t(AP->getPointerSize()),
+ AP->OutStreamer->getContext().getDwarfFormat()};
+
+ if (Optional<uint8_t> FixedSize = dwarf::getFixedFormByteSize(Form, Params))
+ return *FixedSize;
+
switch (Form) {
- case dwarf::DW_FORM_implicit_const:
- case dwarf::DW_FORM_flag_present:
- return 0;
- case dwarf::DW_FORM_flag:
- case dwarf::DW_FORM_ref1:
- case dwarf::DW_FORM_data1:
- case dwarf::DW_FORM_strx1:
- case dwarf::DW_FORM_addrx1:
- return sizeof(int8_t);
- case dwarf::DW_FORM_ref2:
- case dwarf::DW_FORM_data2:
- case dwarf::DW_FORM_strx2:
- case dwarf::DW_FORM_addrx2:
- return sizeof(int16_t);
- case dwarf::DW_FORM_ref4:
- case dwarf::DW_FORM_data4:
- case dwarf::DW_FORM_ref_sup4:
- case dwarf::DW_FORM_strx4:
- case dwarf::DW_FORM_addrx4:
- return sizeof(int32_t);
- case dwarf::DW_FORM_ref8:
- case dwarf::DW_FORM_ref_sig8:
- case dwarf::DW_FORM_data8:
- case dwarf::DW_FORM_ref_sup8:
- return sizeof(int64_t);
- case dwarf::DW_FORM_ref_addr:
- if (AP->getDwarfVersion() == 2)
- return AP->getPointerSize();
- LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_strp:
- case dwarf::DW_FORM_GNU_ref_alt:
- case dwarf::DW_FORM_GNU_strp_alt:
- case dwarf::DW_FORM_line_strp:
- case dwarf::DW_FORM_sec_offset:
- case dwarf::DW_FORM_strp_sup:
- switch (AP->OutStreamer->getContext().getDwarfFormat()) {
- case dwarf::DWARF32:
- return 4;
- case dwarf::DWARF64:
- return 8;
- }
- llvm_unreachable("Invalid DWARF format");
case dwarf::DW_FORM_GNU_str_index:
case dwarf::DW_FORM_GNU_addr_index:
case dwarf::DW_FORM_ref_udata:
+ case dwarf::DW_FORM_strx:
case dwarf::DW_FORM_udata:
return getULEB128Size(Integer);
case dwarf::DW_FORM_sdata:
return getSLEB128Size(Integer);
- case dwarf::DW_FORM_addr:
- return AP->getPointerSize();
default: llvm_unreachable("DIE Value form not supported yet");
}
}
@@ -564,44 +533,46 @@ void DIEDelta::print(raw_ostream &O) const {
/// EmitValue - Emit string value.
///
void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- assert(
- (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) &&
- "Expected valid string form");
-
// Index of string in symbol table.
- if (Form == dwarf::DW_FORM_GNU_str_index) {
+ switch (Form) {
+ case dwarf::DW_FORM_GNU_str_index:
+ case dwarf::DW_FORM_strx:
+ case dwarf::DW_FORM_strx1:
+ case dwarf::DW_FORM_strx2:
+ case dwarf::DW_FORM_strx3:
+ case dwarf::DW_FORM_strx4:
DIEInteger(S.getIndex()).EmitValue(AP, Form);
return;
- }
-
- // Relocatable symbol.
- assert(Form == dwarf::DW_FORM_strp);
- if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) {
- DIELabel(S.getSymbol()).EmitValue(AP, Form);
+ case dwarf::DW_FORM_strp:
+ if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
+ DIELabel(S.getSymbol()).EmitValue(AP, Form);
+ else
+ DIEInteger(S.getOffset()).EmitValue(AP, Form);
return;
+ default:
+ llvm_unreachable("Expected valid string form");
}
-
- // Offset into symbol table.
- DIEInteger(S.getOffset()).EmitValue(AP, Form);
}
/// SizeOf - Determine size of delta value in bytes.
///
unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
- assert(
- (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) &&
- "Expected valid string form");
-
// Index of string in symbol table.
- if (Form == dwarf::DW_FORM_GNU_str_index)
+ switch (Form) {
+ case dwarf::DW_FORM_GNU_str_index:
+ case dwarf::DW_FORM_strx:
+ case dwarf::DW_FORM_strx1:
+ case dwarf::DW_FORM_strx2:
+ case dwarf::DW_FORM_strx3:
+ case dwarf::DW_FORM_strx4:
return DIEInteger(S.getIndex()).SizeOf(AP, Form);
-
- // Relocatable symbol.
- if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
- return DIELabel(S.getSymbol()).SizeOf(AP, Form);
-
- // Offset into symbol table.
- return DIEInteger(S.getOffset()).SizeOf(AP, Form);
+ case dwarf::DW_FORM_strp:
+ if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
+ return DIELabel(S.getSymbol()).SizeOf(AP, Form);
+ return DIEInteger(S.getOffset()).SizeOf(AP, Form);
+ default:
+ llvm_unreachable("Expected valid string form");
+ }
}
LLVM_DUMP_METHOD
@@ -615,8 +586,8 @@ void DIEString::print(raw_ostream &O) const {
void DIEInlineString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_string) {
for (char ch : S)
- AP->EmitInt8(ch);
- AP->EmitInt8(0);
+ AP->emitInt8(ch);
+ AP->emitInt8(0);
return;
}
llvm_unreachable("Expected valid string form");
@@ -722,9 +693,9 @@ unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const {
void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
switch (Form) {
default: llvm_unreachable("Improper form for block");
- case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
- case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break;
- case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break;
+ case dwarf::DW_FORM_block1: Asm->emitInt8(Size); break;
+ case dwarf::DW_FORM_block2: Asm->emitInt16(Size); break;
+ case dwarf::DW_FORM_block4: Asm->emitInt32(Size); break;
case dwarf::DW_FORM_block:
case dwarf::DW_FORM_exprloc:
Asm->EmitULEB128(Size); break;
@@ -773,10 +744,11 @@ unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const {
void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
switch (Form) {
default: llvm_unreachable("Improper form for block");
- case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
- case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break;
- case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break;
+ case dwarf::DW_FORM_block1: Asm->emitInt8(Size); break;
+ case dwarf::DW_FORM_block2: Asm->emitInt16(Size); break;
+ case dwarf::DW_FORM_block4: Asm->emitInt32(Size); break;
case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break;
+ case dwarf::DW_FORM_string: break;
case dwarf::DW_FORM_data16: break;
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 15ade3c96dfe..b8f1202494d7 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -28,7 +28,7 @@ using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
-/// \brief Grabs the string in whichever attribute is passed in and returns
+/// Grabs the string in whichever attribute is passed in and returns
/// a reference to it.
static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) {
// Iterate through all the attributes until we find the one we're
@@ -40,10 +40,10 @@ static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) {
return StringRef("");
}
-/// \brief Adds the string in \p Str to the hash. This also hashes
+/// Adds the string in \p Str to the hash. This also hashes
/// a trailing NULL with the string.
void DIEHash::addString(StringRef Str) {
- DEBUG(dbgs() << "Adding string " << Str << " to hash.\n");
+ LLVM_DEBUG(dbgs() << "Adding string " << Str << " to hash.\n");
Hash.update(Str);
Hash.update(makeArrayRef((uint8_t)'\0'));
}
@@ -51,9 +51,9 @@ void DIEHash::addString(StringRef Str) {
// FIXME: The LEB128 routines are copied and only slightly modified out of
// LEB128.h.
-/// \brief Adds the unsigned in \p Value to the hash encoded as a ULEB128.
+/// Adds the unsigned in \p Value to the hash encoded as a ULEB128.
void DIEHash::addULEB128(uint64_t Value) {
- DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
+ LLVM_DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
do {
uint8_t Byte = Value & 0x7f;
Value >>= 7;
@@ -64,7 +64,7 @@ void DIEHash::addULEB128(uint64_t Value) {
}
void DIEHash::addSLEB128(int64_t Value) {
- DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
+ LLVM_DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
bool More;
do {
uint8_t Byte = Value & 0x7f;
@@ -77,10 +77,10 @@ void DIEHash::addSLEB128(int64_t Value) {
} while (More);
}
-/// \brief Including \p Parent adds the context of Parent to the hash..
+/// Including \p Parent adds the context of Parent to the hash..
void DIEHash::addParentContext(const DIE &Parent) {
- DEBUG(dbgs() << "Adding parent context to hash...\n");
+ LLVM_DEBUG(dbgs() << "Adding parent context to hash...\n");
// [7.27.2] For each surrounding type or namespace beginning with the
// outermost such construct...
@@ -108,7 +108,7 @@ void DIEHash::addParentContext(const DIE &Parent) {
// ... Then the name, taken from the DW_AT_name attribute.
StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name);
- DEBUG(dbgs() << "... adding context: " << Name << "\n");
+ LLVM_DEBUG(dbgs() << "... adding context: " << Name << "\n");
if (!Name.empty())
addString(Name);
}
@@ -118,9 +118,9 @@ void DIEHash::addParentContext(const DIE &Parent) {
void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) {
for (const auto &V : Die.values()) {
- DEBUG(dbgs() << "Attribute: "
- << dwarf::AttributeString(V.getAttribute())
- << " added.\n");
+ LLVM_DEBUG(dbgs() << "Attribute: "
+ << dwarf::AttributeString(V.getAttribute())
+ << " added.\n");
switch (V.getAttribute()) {
#define HANDLE_DIE_HASH_ATTR(NAME) \
case dwarf::NAME: \
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
index 29337ae38a99..dae517ab2c29 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -23,7 +23,7 @@ namespace llvm {
class AsmPrinter;
class CompileUnit;
-/// \brief An object containing the capability of hashing and adding hash
+/// An object containing the capability of hashing and adding hash
/// attributes onto a DIE.
class DIEHash {
// Collection of all attributes used in hashing a particular DIE.
@@ -35,66 +35,66 @@ class DIEHash {
public:
DIEHash(AsmPrinter *A = nullptr) : AP(A) {}
- /// \brief Computes the CU signature.
+ /// Computes the CU signature.
uint64_t computeCUSignature(StringRef DWOName, const DIE &Die);
- /// \brief Computes the type signature.
+ /// Computes the type signature.
uint64_t computeTypeSignature(const DIE &Die);
// Helper routines to process parts of a DIE.
private:
- /// \brief Adds the parent context of \param Die to the hash.
- void addParentContext(const DIE &Die);
+ /// Adds the parent context of \param Parent to the hash.
+ void addParentContext(const DIE &Parent);
- /// \brief Adds the attributes of \param Die to the hash.
+ /// Adds the attributes of \param Die to the hash.
void addAttributes(const DIE &Die);
- /// \brief Computes the full DWARF4 7.27 hash of the DIE.
+ /// Computes the full DWARF4 7.27 hash of the DIE.
void computeHash(const DIE &Die);
// Routines that add DIEValues to the hash.
public:
- /// \brief Adds \param Value to the hash.
+ /// Adds \param Value to the hash.
void update(uint8_t Value) { Hash.update(Value); }
- /// \brief Encodes and adds \param Value to the hash as a ULEB128.
+ /// Encodes and adds \param Value to the hash as a ULEB128.
void addULEB128(uint64_t Value);
- /// \brief Encodes and adds \param Value to the hash as a SLEB128.
+ /// Encodes and adds \param Value to the hash as a SLEB128.
void addSLEB128(int64_t Value);
private:
- /// \brief Adds \param Str to the hash and includes a NULL byte.
+ /// Adds \param Str to the hash and includes a NULL byte.
void addString(StringRef Str);
- /// \brief Collects the attributes of DIE \param Die into the \param Attrs
+ /// Collects the attributes of DIE \param Die into the \param Attrs
/// structure.
void collectAttributes(const DIE &Die, DIEAttrs &Attrs);
- /// \brief Hashes the attributes in \param Attrs in order.
+ /// Hashes the attributes in \param Attrs in order.
void hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag);
- /// \brief Hashes the data in a block like DIEValue, e.g. DW_FORM_block or
+ /// Hashes the data in a block like DIEValue, e.g. DW_FORM_block or
/// DW_FORM_exprloc.
void hashBlockData(const DIE::const_value_range &Values);
- /// \brief Hashes the contents pointed to in the .debug_loc section.
+ /// Hashes the contents pointed to in the .debug_loc section.
void hashLocList(const DIELocList &LocList);
- /// \brief Hashes an individual attribute.
+ /// Hashes an individual attribute.
void hashAttribute(const DIEValue &Value, dwarf::Tag Tag);
- /// \brief Hashes an attribute that refers to another DIE.
+ /// Hashes an attribute that refers to another DIE.
void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
const DIE &Entry);
- /// \brief Hashes a reference to a named type in such a way that is
+ /// Hashes a reference to a named type in such a way that is
/// independent of whether that type is described by a declaration or a
/// definition.
void hashShallowTypeReference(dwarf::Attribute Attribute, const DIE &Entry,
StringRef Name);
- /// \brief Hashes a reference to a previously referenced type DIE.
+ /// Hashes a reference to a previously referenced type DIE.
void hashRepeatedTypeReference(dwarf::Attribute Attribute,
unsigned DieNumber);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index 856758c8e4f6..25518a339c61 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
-// \brief If @MI is a DBG_VALUE with debug value described by a
+// If @MI is a DBG_VALUE with debug value described by a
// defined register, returns the number of this register.
// In the other case, returns 0.
static unsigned isDescribedByReg(const MachineInstr &MI) {
@@ -50,8 +50,8 @@ void DbgValueHistoryMap::startInstrRange(InlinedVariable Var,
auto &Ranges = VarInstrRanges[Var];
if (!Ranges.empty() && Ranges.back().second == nullptr &&
Ranges.back().first->isIdenticalTo(MI)) {
- DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
- << "\t" << Ranges.back().first << "\t" << MI << "\n");
+ LLVM_DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
+ << "\t" << Ranges.back().first << "\t" << MI << "\n");
return;
}
Ranges.push_back(std::make_pair(&MI, nullptr));
@@ -86,7 +86,7 @@ using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedVariable, 1>>;
} // end anonymous namespace
-// \brief Claim that @Var is not described by @RegNo anymore.
+// Claim that @Var is not described by @RegNo anymore.
static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
InlinedVariable Var) {
const auto &I = RegVars.find(RegNo);
@@ -100,7 +100,7 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
RegVars.erase(I);
}
-// \brief Claim that @Var is now described by @RegNo.
+// Claim that @Var is now described by @RegNo.
static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
InlinedVariable Var) {
assert(RegNo != 0U);
@@ -109,7 +109,7 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
VarSet.push_back(Var);
}
-// \brief Terminate the location range for variables described by register at
+// Terminate the location range for variables described by register at
// @I by inserting @ClobberingInstr to their history.
static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
RegDescribedVarsMap::iterator I,
@@ -122,7 +122,7 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
RegVars.erase(I);
}
-// \brief Terminate the location range for variables described by register
+// Terminate the location range for variables described by register
// @RegNo by inserting @ClobberingInstr to their history.
static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
DbgValueHistoryMap &HistMap,
@@ -133,7 +133,7 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
clobberRegisterUses(RegVars, I, HistMap, ClobberingInstr);
}
-// \brief Returns the first instruction in @MBB which corresponds to
+// Returns the first instruction in @MBB which corresponds to
// the function epilogue, or nullptr if @MBB doesn't contain an epilogue.
static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
auto LastMI = MBB.getLastNonDebugInstr();
@@ -155,7 +155,7 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
return &*MBB.begin();
}
-// \brief Collect registers that are modified in the function body (their
+// Collect registers that are modified in the function body (their
// contents is changed outside of the prologue and epilogue).
static void collectChangingRegs(const MachineFunction *MF,
const TargetRegisterInfo *TRI,
@@ -198,7 +198,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
RegDescribedVarsMap RegVars;
for (const auto &MBB : *MF) {
for (const auto &MI : MBB) {
- if (!MI.isDebugValue()) {
+ if (!MI.isDebugInstr()) {
// Not a DBG_VALUE instruction. It may clobber registers which describe
// some variables.
for (const MachineOperand &MO : MI.operands()) {
@@ -234,6 +234,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
continue;
}
+ // Skip DBG_LABEL instructions.
+ if (MI.isDebugLabel())
+ continue;
+
assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");
// Use the base variable (without any DW_OP_piece expressions)
// as index into History. The full variables including the
@@ -265,3 +269,33 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
}
}
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const {
+ dbgs() << "DbgValueHistoryMap:\n";
+ for (const auto &VarRangePair : *this) {
+ const InlinedVariable &Var = VarRangePair.first;
+ const InstrRanges &Ranges = VarRangePair.second;
+
+ const DILocalVariable *LocalVar = Var.first;
+ const DILocation *Location = Var.second;
+
+ dbgs() << " - " << LocalVar->getName() << " at ";
+
+ if (Location)
+ dbgs() << Location->getFilename() << ":" << Location->getLine() << ":"
+ << Location->getColumn();
+ else
+ dbgs() << "<unknown location>";
+
+ dbgs() << " --\n";
+
+ for (const InstrRange &Range : Ranges) {
+ dbgs() << " Begin: " << *Range.first;
+ if (Range.second)
+ dbgs() << " End : " << *Range.second;
+ dbgs() << "\n";
+ }
+ }
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
index a7b0562e8102..a262cb38b175 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
@@ -52,6 +52,10 @@ public:
void clear() { VarInstrRanges.clear(); }
InstrRangesMap::const_iterator begin() const { return VarInstrRanges.begin(); }
InstrRangesMap::const_iterator end() const { return VarInstrRanges.end(); }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const;
+#endif
};
void calculateDbgValueHistory(const MachineFunction *MF,
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 2e5c22447936..82e14dc13cb1 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -25,6 +25,8 @@
using namespace llvm;
+#define DEBUG_TYPE "dwarfdebug"
+
Optional<DbgVariableLocation>
DbgVariableLocation::extractFromMachineInstruction(
const MachineInstr &Instruction) {
@@ -123,29 +125,6 @@ MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) {
return LabelsAfterInsn.lookup(MI);
}
-int DebugHandlerBase::fragmentCmp(const DIExpression *P1,
- const DIExpression *P2) {
- auto Fragment1 = *P1->getFragmentInfo();
- auto Fragment2 = *P2->getFragmentInfo();
- unsigned l1 = Fragment1.OffsetInBits;
- unsigned l2 = Fragment2.OffsetInBits;
- unsigned r1 = l1 + Fragment1.SizeInBits;
- unsigned r2 = l2 + Fragment2.SizeInBits;
- if (r1 <= l2)
- return -1;
- else if (r2 <= l1)
- return 1;
- else
- return 0;
-}
-
-bool DebugHandlerBase::fragmentsOverlap(const DIExpression *P1,
- const DIExpression *P2) {
- if (!P1->isFragment() || !P2->isFragment())
- return true;
- return fragmentCmp(P1, P2) == 0;
-}
-
/// If this type is derived from a base type then return base type size.
uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
DIType *Ty = TyRef.resolve();
@@ -213,6 +192,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
assert(DbgValues.empty() && "DbgValues map wasn't cleaned!");
calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
DbgValues);
+ LLVM_DEBUG(DbgValues.dump());
// Request labels for the full history.
for (const auto &I : DbgValues) {
@@ -232,8 +212,8 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
const DIExpression *Fragment = I->first->getDebugExpression();
if (std::all_of(Ranges.begin(), I,
[&](DbgValueHistoryMap::InstrRange Pred) {
- return !fragmentsOverlap(
- Fragment, Pred.first->getDebugExpression());
+ return !Fragment->fragmentsOverlap(
+ Pred.first->getDebugExpression());
}))
LabelsBeforeInsn[I->first] = Asm->getFunctionBegin();
else
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
index 245d70038de9..1ccefe32be75 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
@@ -122,14 +122,6 @@ public:
/// Return Label immediately following the instruction.
MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
- /// Determine the relative position of the fragments described by P1 and P2.
- /// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap, 1 if P1 is
- /// entirely after P2.
- static int fragmentCmp(const DIExpression *P1, const DIExpression *P2);
-
- /// Determine whether two variable fragments overlap.
- static bool fragmentsOverlap(const DIExpression *P1, const DIExpression *P2);
-
/// If this type is derived from a base type then return base type size.
static uint64_t getBaseTypeSize(const DITypeRef TyRef);
};
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 3d6d8a76529c..ac49657b68fa 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -11,6 +11,7 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
#include "DebugLocStream.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -20,7 +21,7 @@
namespace llvm {
class AsmPrinter;
-/// \brief This struct describes location entries emitted in the .debug_loc
+/// This struct describes location entries emitted in the .debug_loc
/// section.
class DebugLocEntry {
/// Begin and end symbols for the address range that this location is valid.
@@ -28,7 +29,7 @@ class DebugLocEntry {
const MCSymbol *End;
public:
- /// \brief A single location or constant.
+ /// A single location or constant.
struct Value {
Value(const DIExpression *Expr, int64_t i)
: Expression(Expr), EntryKind(E_Integer) {
@@ -105,13 +106,13 @@ public:
Values.push_back(std::move(Val));
}
- /// \brief If this and Next are describing different pieces of the same
+ /// If this and Next are describing different pieces of the same
/// variable, merge them by appending Next's values to the current
/// list of values.
/// Return true if the merge was successful.
bool MergeValues(const DebugLocEntry &Next);
- /// \brief Attempt to merge this DebugLocEntry with Next and return
+ /// Attempt to merge this DebugLocEntry with Next and return
/// true if the merge was successful. Entries can be merged if they
/// share the same Loc/Constant and if Next immediately follows this
/// Entry.
@@ -135,10 +136,10 @@ public:
}) && "value must be a piece");
}
- // \brief Sort the pieces by offset.
+ // Sort the pieces by offset.
// Remove any duplicate entries by dropping all but the first.
void sortUniqueValues() {
- std::sort(Values.begin(), Values.end());
+ llvm::sort(Values.begin(), Values.end());
Values.erase(
std::unique(
Values.begin(), Values.end(), [](const Value &A, const Value &B) {
@@ -147,12 +148,12 @@ public:
Values.end());
}
- /// \brief Lower this entry into a DWARF expression.
+ /// Lower this entry into a DWARF expression.
void finalize(const AsmPrinter &AP, DebugLocStream::ListBuilder &List,
const DIBasicType *BT);
};
-/// \brief Compare two Values for equality.
+/// Compare two Values for equality.
inline bool operator==(const DebugLocEntry::Value &A,
const DebugLocEntry::Value &B) {
if (A.EntryKind != B.EntryKind)
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 0c551dfff9cc..8dcf5cbc1889 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -22,7 +22,7 @@ class DwarfCompileUnit;
class MachineInstr;
class MCSymbol;
-/// \brief Byte stream of .debug_loc entries.
+/// Byte stream of .debug_loc entries.
///
/// Stores a unified stream of .debug_loc entries. There's \a List for each
/// variable/inlined-at pair, and an \a Entry for each \a DebugLocEntry.
@@ -55,7 +55,7 @@ private:
SmallString<256> DWARFBytes;
SmallVector<std::string, 32> Comments;
- /// \brief Only verbose textual output needs comments. This will be set to
+ /// Only verbose textual output needs comments. This will be set to
/// true for that case, and false otherwise.
bool GenerateComments;
@@ -69,7 +69,7 @@ public:
class EntryBuilder;
private:
- /// \brief Start a new .debug_loc entry list.
+ /// Start a new .debug_loc entry list.
///
/// Start a new .debug_loc entry list. Return the new list's index so it can
/// be retrieved later via \a getList().
@@ -89,7 +89,7 @@ private:
/// \return false iff the list is deleted.
bool finalizeList(AsmPrinter &Asm);
- /// \brief Start a new .debug_loc entry.
+ /// Start a new .debug_loc entry.
///
/// Until the next call, bytes added to the stream will be added to this
/// entry.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
deleted file mode 100644
index c21b3d3451ad..000000000000
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ /dev/null
@@ -1,293 +0,0 @@
-//===- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for writing dwarf accelerator tables.
-//
-//===----------------------------------------------------------------------===//
-
-#include "DwarfAccelTable.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/DIE.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <iterator>
-#include <limits>
-#include <vector>
-
-using namespace llvm;
-
-// The length of the header data is always going to be 4 + 4 + 4*NumAtoms.
-DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList)
- : Header(8 + (atomList.size() * 4)), HeaderData(atomList),
- Entries(Allocator) {}
-
-void DwarfAccelTable::AddName(DwarfStringPoolEntryRef Name, const DIE *die,
- char Flags) {
- assert(Data.empty() && "Already finalized!");
- // If the string is in the list already then add this die to the list
- // otherwise add a new one.
- DataArray &DIEs = Entries[Name.getString()];
- assert(!DIEs.Name || DIEs.Name == Name);
- DIEs.Name = Name;
- DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags));
-}
-
-void DwarfAccelTable::ComputeBucketCount() {
- // First get the number of unique hashes.
- std::vector<uint32_t> uniques(Data.size());
- for (size_t i = 0, e = Data.size(); i < e; ++i)
- uniques[i] = Data[i]->HashValue;
- array_pod_sort(uniques.begin(), uniques.end());
- std::vector<uint32_t>::iterator p =
- std::unique(uniques.begin(), uniques.end());
- uint32_t num = std::distance(uniques.begin(), p);
-
- // Then compute the bucket size, minimum of 1 bucket.
- if (num > 1024)
- Header.bucket_count = num / 4;
- else if (num > 16)
- Header.bucket_count = num / 2;
- else
- Header.bucket_count = num > 0 ? num : 1;
-
- Header.hashes_count = num;
-}
-
-// compareDIEs - comparison predicate that sorts DIEs by their offset.
-static bool compareDIEs(const DwarfAccelTable::HashDataContents *A,
- const DwarfAccelTable::HashDataContents *B) {
- return A->Die->getOffset() < B->Die->getOffset();
-}
-
-void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) {
- // Create the individual hash data outputs.
- Data.reserve(Entries.size());
- for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end();
- EI != EE; ++EI) {
-
- // Unique the entries.
- std::stable_sort(EI->second.Values.begin(), EI->second.Values.end(), compareDIEs);
- EI->second.Values.erase(
- std::unique(EI->second.Values.begin(), EI->second.Values.end()),
- EI->second.Values.end());
-
- HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second);
- Data.push_back(Entry);
- }
-
- // Figure out how many buckets we need, then compute the bucket
- // contents and the final ordering. We'll emit the hashes and offsets
- // by doing a walk during the emission phase. We add temporary
- // symbols to the data so that we can reference them during the offset
- // later, we'll emit them when we emit the data.
- ComputeBucketCount();
-
- // Compute bucket contents and final ordering.
- Buckets.resize(Header.bucket_count);
- for (size_t i = 0, e = Data.size(); i < e; ++i) {
- uint32_t bucket = Data[i]->HashValue % Header.bucket_count;
- Buckets[bucket].push_back(Data[i]);
- Data[i]->Sym = Asm->createTempSymbol(Prefix);
- }
-
- // Sort the contents of the buckets by hash value so that hash
- // collisions end up together. Stable sort makes testing easier and
- // doesn't cost much more.
- for (size_t i = 0; i < Buckets.size(); ++i)
- std::stable_sort(Buckets[i].begin(), Buckets[i].end(),
- [] (HashData *LHS, HashData *RHS) {
- return LHS->HashValue < RHS->HashValue;
- });
-}
-
-// Emits the header for the table via the AsmPrinter.
-void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
- Asm->OutStreamer->AddComment("Header Magic");
- Asm->EmitInt32(Header.magic);
- Asm->OutStreamer->AddComment("Header Version");
- Asm->EmitInt16(Header.version);
- Asm->OutStreamer->AddComment("Header Hash Function");
- Asm->EmitInt16(Header.hash_function);
- Asm->OutStreamer->AddComment("Header Bucket Count");
- Asm->EmitInt32(Header.bucket_count);
- Asm->OutStreamer->AddComment("Header Hash Count");
- Asm->EmitInt32(Header.hashes_count);
- Asm->OutStreamer->AddComment("Header Data Length");
- Asm->EmitInt32(Header.header_data_len);
- Asm->OutStreamer->AddComment("HeaderData Die Offset Base");
- Asm->EmitInt32(HeaderData.die_offset_base);
- Asm->OutStreamer->AddComment("HeaderData Atom Count");
- Asm->EmitInt32(HeaderData.Atoms.size());
- for (size_t i = 0; i < HeaderData.Atoms.size(); i++) {
- Atom A = HeaderData.Atoms[i];
- Asm->OutStreamer->AddComment(dwarf::AtomTypeString(A.type));
- Asm->EmitInt16(A.type);
- Asm->OutStreamer->AddComment(dwarf::FormEncodingString(A.form));
- Asm->EmitInt16(A.form);
- }
-}
-
-// Walk through and emit the buckets for the table. Each index is
-// an offset into the list of hashes.
-void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
- unsigned index = 0;
- for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
- Asm->OutStreamer->AddComment("Bucket " + Twine(i));
- if (!Buckets[i].empty())
- Asm->EmitInt32(index);
- else
- Asm->EmitInt32(std::numeric_limits<uint32_t>::max());
- // Buckets point in the list of hashes, not to the data. Do not
- // increment the index multiple times in case of hash collisions.
- uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
- for (auto *HD : Buckets[i]) {
- uint32_t HashValue = HD->HashValue;
- if (PrevHash != HashValue)
- ++index;
- PrevHash = HashValue;
- }
- }
-}
-
-// Walk through the buckets and emit the individual hashes for each
-// bucket.
-void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
- uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
- for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
- for (HashList::const_iterator HI = Buckets[i].begin(),
- HE = Buckets[i].end();
- HI != HE; ++HI) {
- uint32_t HashValue = (*HI)->HashValue;
- if (PrevHash == HashValue)
- continue;
- Asm->OutStreamer->AddComment("Hash in Bucket " + Twine(i));
- Asm->EmitInt32(HashValue);
- PrevHash = HashValue;
- }
- }
-}
-
-// Walk through the buckets and emit the individual offsets for each
-// element in each bucket. This is done via a symbol subtraction from the
-// beginning of the section. The non-section symbol will be output later
-// when we emit the actual data.
-void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) {
- uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
- for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
- for (HashList::const_iterator HI = Buckets[i].begin(),
- HE = Buckets[i].end();
- HI != HE; ++HI) {
- uint32_t HashValue = (*HI)->HashValue;
- if (PrevHash == HashValue)
- continue;
- PrevHash = HashValue;
- Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i));
- MCContext &Context = Asm->OutStreamer->getContext();
- const MCExpr *Sub = MCBinaryExpr::createSub(
- MCSymbolRefExpr::create((*HI)->Sym, Context),
- MCSymbolRefExpr::create(SecBegin, Context), Context);
- Asm->OutStreamer->EmitValue(Sub, sizeof(uint32_t));
- }
- }
-}
-
-// Walk through the buckets and emit the full data for each element in
-// the bucket. For the string case emit the dies and the various offsets.
-// Terminate each HashData bucket with 0.
-void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
- for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
- uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
- for (HashList::const_iterator HI = Buckets[i].begin(),
- HE = Buckets[i].end();
- HI != HE; ++HI) {
- // Terminate the previous entry if there is no hash collision
- // with the current one.
- if (PrevHash != std::numeric_limits<uint64_t>::max() &&
- PrevHash != (*HI)->HashValue)
- Asm->EmitInt32(0);
- // Remember to emit the label for our offset.
- Asm->OutStreamer->EmitLabel((*HI)->Sym);
- Asm->OutStreamer->AddComment((*HI)->Str);
- Asm->emitDwarfStringOffset((*HI)->Data.Name);
- Asm->OutStreamer->AddComment("Num DIEs");
- Asm->EmitInt32((*HI)->Data.Values.size());
- for (HashDataContents *HD : (*HI)->Data.Values) {
- // Emit the DIE offset
- Asm->EmitInt32(HD->Die->getDebugSectionOffset());
- // If we have multiple Atoms emit that info too.
- // FIXME: A bit of a hack, we either emit only one atom or all info.
- if (HeaderData.Atoms.size() > 1) {
- Asm->EmitInt16(HD->Die->getTag());
- Asm->EmitInt8(HD->Flags);
- }
- }
- PrevHash = (*HI)->HashValue;
- }
- // Emit the final end marker for the bucket.
- if (!Buckets[i].empty())
- Asm->EmitInt32(0);
- }
-}
-
-// Emit the entire data structure to the output file.
-void DwarfAccelTable::emit(AsmPrinter *Asm, const MCSymbol *SecBegin,
- DwarfDebug *D) {
- // Emit the header.
- EmitHeader(Asm);
-
- // Emit the buckets.
- EmitBuckets(Asm);
-
- // Emit the hashes.
- EmitHashes(Asm);
-
- // Emit the offsets.
- emitOffsets(Asm, SecBegin);
-
- // Emit the hash data.
- EmitData(Asm, D);
-}
-
-#ifndef NDEBUG
-void DwarfAccelTable::print(raw_ostream &OS) {
- Header.print(OS);
- HeaderData.print(OS);
-
- OS << "Entries: \n";
- for (StringMap<DataArray>::const_iterator EI = Entries.begin(),
- EE = Entries.end();
- EI != EE; ++EI) {
- OS << "Name: " << EI->getKeyData() << "\n";
- for (HashDataContents *HD : EI->second.Values)
- HD->print(OS);
- }
-
- OS << "Buckets and Hashes: \n";
- for (size_t i = 0, e = Buckets.size(); i < e; ++i)
- for (HashList::const_iterator HI = Buckets[i].begin(),
- HE = Buckets[i].end();
- HI != HE; ++HI)
- (*HI)->print(OS);
-
- OS << "Data: \n";
- for (std::vector<HashData *>::const_iterator DI = Data.begin(),
- DE = Data.end();
- DI != DE; ++DI)
- (*DI)->print(OS);
-}
-#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
deleted file mode 100644
index f56199dc8e72..000000000000
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ /dev/null
@@ -1,261 +0,0 @@
-//==- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables --*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for writing dwarf accelerator tables.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
-#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/CodeGen/DIE.h"
-#include "llvm/CodeGen/DwarfStringPoolEntry.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstddef>
-#include <cstdint>
-#include <vector>
-
-// The dwarf accelerator tables are an indirect hash table optimized
-// for null lookup rather than access to known data. They are output into
-// an on-disk format that looks like this:
-//
-// .-------------.
-// | HEADER |
-// |-------------|
-// | BUCKETS |
-// |-------------|
-// | HASHES |
-// |-------------|
-// | OFFSETS |
-// |-------------|
-// | DATA |
-// `-------------'
-//
-// where the header contains a magic number, version, type of hash function,
-// the number of buckets, total number of hashes, and room for a special
-// struct of data and the length of that struct.
-//
-// The buckets contain an index (e.g. 6) into the hashes array. The hashes
-// section contains all of the 32-bit hash values in contiguous memory, and
-// the offsets contain the offset into the data area for the particular
-// hash.
-//
-// For a lookup example, we could hash a function name and take it modulo the
-// number of buckets giving us our bucket. From there we take the bucket value
-// as an index into the hashes table and look at each successive hash as long
-// as the hash value is still the same modulo result (bucket value) as earlier.
-// If we have a match we look at that same entry in the offsets table and
-// grab the offset in the data for our final match.
-
-namespace llvm {
-
-class AsmPrinter;
-class DwarfDebug;
-
-class DwarfAccelTable {
- // Helper function to compute the number of buckets needed based on
- // the number of unique hashes.
- void ComputeBucketCount();
-
- struct TableHeader {
- uint32_t magic = MagicHash; // 'HASH' magic value to allow endian detection
- uint16_t version = 1; // Version number.
- uint16_t hash_function = dwarf::DW_hash_function_djb;
- // The hash function enumeration that was used.
- uint32_t bucket_count = 0; // The number of buckets in this hash table.
- uint32_t hashes_count = 0; // The total number of unique hash values
- // and hash data offsets in this table.
- uint32_t header_data_len; // The bytes to skip to get to the hash
- // indexes (buckets) for correct alignment.
- // Also written to disk is the implementation specific header data.
-
- static const uint32_t MagicHash = 0x48415348;
-
- TableHeader(uint32_t data_len) : header_data_len(data_len) {}
-
-#ifndef NDEBUG
- void print(raw_ostream &OS) {
- OS << "Magic: " << format("0x%x", magic) << "\n"
- << "Version: " << version << "\n"
- << "Hash Function: " << hash_function << "\n"
- << "Bucket Count: " << bucket_count << "\n"
- << "Header Data Length: " << header_data_len << "\n";
- }
-
- void dump() { print(dbgs()); }
-#endif
- };
-
-public:
- // The HeaderData describes the form of each set of data. In general this
- // is as a list of atoms (atom_count) where each atom contains a type
- // (AtomType type) of data, and an encoding form (form). In the case of
- // data that is referenced via DW_FORM_ref_* the die_offset_base is
- // used to describe the offset for all forms in the list of atoms.
- // This also serves as a public interface of sorts.
- // When written to disk this will have the form:
- //
- // uint32_t die_offset_base
- // uint32_t atom_count
- // atom_count Atoms
-
- // Make these public so that they can be used as a general interface to
- // the class.
- struct Atom {
- uint16_t type; // enum AtomType
- uint16_t form; // DWARF DW_FORM_ defines
-
- constexpr Atom(uint16_t type, uint16_t form) : type(type), form(form) {}
-
-#ifndef NDEBUG
- void print(raw_ostream &OS) {
- OS << "Type: " << dwarf::AtomTypeString(type) << "\n"
- << "Form: " << dwarf::FormEncodingString(form) << "\n";
- }
-
- void dump() { print(dbgs()); }
-#endif
- };
-
-private:
- struct TableHeaderData {
- uint32_t die_offset_base;
- SmallVector<Atom, 3> Atoms;
-
- TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0)
- : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {}
-
-#ifndef NDEBUG
- void print(raw_ostream &OS) {
- OS << "die_offset_base: " << die_offset_base << "\n";
- for (size_t i = 0; i < Atoms.size(); i++)
- Atoms[i].print(OS);
- }
-
- void dump() { print(dbgs()); }
-#endif
- };
-
- // The data itself consists of a str_offset, a count of the DIEs in the
- // hash and the offsets to the DIEs themselves.
- // On disk each data section is ended with a 0 KeyType as the end of the
- // hash chain.
- // On output this looks like:
- // uint32_t str_offset
- // uint32_t hash_data_count
- // HashData[hash_data_count]
-public:
- struct HashDataContents {
- const DIE *Die; // Offsets
- char Flags; // Specific flags to output
-
- HashDataContents(const DIE *D, char Flags) : Die(D), Flags(Flags) {}
-
-#ifndef NDEBUG
- void print(raw_ostream &OS) const {
- OS << " Offset: " << Die->getOffset() << "\n"
- << " Tag: " << dwarf::TagString(Die->getTag()) << "\n"
- << " Flags: " << Flags << "\n";
- }
-#endif
- };
-
-private:
- // String Data
- struct DataArray {
- DwarfStringPoolEntryRef Name;
- std::vector<HashDataContents *> Values;
- };
-
- friend struct HashData;
-
- struct HashData {
- StringRef Str;
- uint32_t HashValue;
- MCSymbol *Sym;
- DwarfAccelTable::DataArray &Data; // offsets
-
- HashData(StringRef S, DwarfAccelTable::DataArray &Data)
- : Str(S), Data(Data) {
- HashValue = dwarf::djbHash(S);
- }
-
-#ifndef NDEBUG
- void print(raw_ostream &OS) {
- OS << "Name: " << Str << "\n";
- OS << " Hash Value: " << format("0x%x", HashValue) << "\n";
- OS << " Symbol: ";
- if (Sym)
- OS << *Sym;
- else
- OS << "<none>";
- OS << "\n";
- for (HashDataContents *C : Data.Values) {
- OS << " Offset: " << C->Die->getOffset() << "\n";
- OS << " Tag: " << dwarf::TagString(C->Die->getTag()) << "\n";
- OS << " Flags: " << C->Flags << "\n";
- }
- }
-
- void dump() { print(dbgs()); }
-#endif
- };
-
- // Internal Functions
- void EmitHeader(AsmPrinter *);
- void EmitBuckets(AsmPrinter *);
- void EmitHashes(AsmPrinter *);
- void emitOffsets(AsmPrinter *, const MCSymbol *);
- void EmitData(AsmPrinter *, DwarfDebug *D);
-
- // Allocator for HashData and HashDataContents.
- BumpPtrAllocator Allocator;
-
- // Output Variables
- TableHeader Header;
- TableHeaderData HeaderData;
- std::vector<HashData *> Data;
-
- using StringEntries = StringMap<DataArray, BumpPtrAllocator &>;
-
- StringEntries Entries;
-
- // Buckets/Hashes/Offsets
- using HashList = std::vector<HashData *>;
- using BucketList = std::vector<HashList>;
- BucketList Buckets;
- HashList Hashes;
-
- // Public Implementation
-public:
- DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
- DwarfAccelTable(const DwarfAccelTable &) = delete;
- DwarfAccelTable &operator=(const DwarfAccelTable &) = delete;
-
- void AddName(DwarfStringPoolEntryRef Name, const DIE *Die, char Flags = 0);
- void FinalizeTable(AsmPrinter *, StringRef);
- void emit(AsmPrinter *, const MCSymbol *, DwarfDebug *);
-#ifndef NDEBUG
- void print(raw_ostream &OS);
- void dump() { print(dbgs()); }
-#endif
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index cbb4c48b4d88..1990456cc555 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
@@ -30,6 +29,7 @@
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index c8cd8eb8ffd3..32271a0ef24a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -40,6 +39,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
@@ -94,16 +94,18 @@ void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
DIEInteger(0));
}
-unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName,
- StringRef DirName) {
+unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
// If we print assembly, we can't separate .file entries according to
// compile units. Thus all files will belong to the default compile unit.
// FIXME: add a better feature test than hasRawTextSupport. Even better,
// extend .file to support this.
+ unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID();
+ if (!File)
+ return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", nullptr, None, CUID);
return Asm->OutStreamer->EmitDwarfFileDirective(
- 0, DirName, FileName,
- Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID());
+ 0, File->getDirectory(), File->getFilename(), getMD5AsBytes(File),
+ File->getSource(), CUID);
}
DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
@@ -190,10 +192,13 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
}
+ if (Expr)
+ DwarfExpr->addFragmentOffset(Expr);
+
if (Global) {
const MCSymbol *Sym = Asm->getSymbol(Global);
if (Global->isThreadLocal()) {
- if (Asm->TM.Options.EmulatedTLS) {
+ if (Asm->TM.useEmulatedTLS()) {
// TODO: add debug info for emulated thread local mode.
} else {
// FIXME: Make this work with -gsplit-dwarf.
@@ -225,10 +230,13 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
addOpAddress(*Loc, Sym);
}
}
- if (Expr) {
- DwarfExpr->addFragmentOffset(Expr);
- DwarfExpr->addExpression(Expr);
- }
+ // Global variables attached to symbols are memory locations.
+ // It would be better if this were unconditional, but malformed input that
+ // mixes non-fragments and fragments for the same variable is too expensive
+ // to detect in the verifier.
+ if (DwarfExpr->isUnknownLocation())
+ DwarfExpr->setMemoryLocationKind();
+ DwarfExpr->addExpression(Expr);
}
if (Loc)
addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize());
@@ -241,7 +249,8 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
// If the linkage name is different than the name, go ahead and output
// that as well into the name table.
- if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName())
+ if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName() &&
+ DD->useAllLinkageNames())
DD->addAccelName(GV->getLinkageName(), *VariableDIE);
}
@@ -267,15 +276,20 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
void DwarfCompileUnit::initStmtList() {
// Define start line table label for each Compile Unit.
- MCSymbol *LineTableStartSym =
- Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID());
+ MCSymbol *LineTableStartSym;
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ if (DD->useSectionsAsReferences()) {
+ LineTableStartSym = TLOF.getDwarfLineSection()->getBeginSymbol();
+ } else {
+ LineTableStartSym =
+ Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID());
+ }
// DW_AT_stmt_list is a offset of line number information for this
// compile unit in debug_line section. For split dwarf this is
// left in the skeleton CU and so not included.
// The line table entries are not always emitted in assembly, so it
// is not okay to use line_table_start here.
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
StmtListValue =
addSectionLabel(getUnitDie(), dwarf::DW_AT_stmt_list, LineTableStartSym,
TLOF.getDwarfLineSection()->getBeginSymbol());
@@ -313,10 +327,16 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
// Only include DW_AT_frame_base in full debug info
if (!includeMinimalInlineScopes()) {
- const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo();
- MachineLocation Location(RI->getFrameRegister(*Asm->MF));
- if (RI->isPhysicalRegister(Location.getReg()))
- addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
+ if (Asm->MF->getTarget().getTargetTriple().isNVPTX()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_call_frame_cfa);
+ addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
+ } else {
+ const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo();
+ MachineLocation Location(RI->getFrameRegister(*Asm->MF));
+ if (RI->isPhysicalRegister(Location.getReg()))
+ addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
+ }
}
// Add name to the name table, we do this here because we're guaranteed
@@ -385,21 +405,28 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
SmallVector<RangeSpan, 2> Range) {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
- // Emit offset in .debug_range as a relocatable label. emitDIE will handle
- // emitting it appropriately.
+ // Emit the offset into .debug_ranges or .debug_rnglists as a relocatable
+ // label. emitDIE() will handle emitting it appropriately.
const MCSymbol *RangeSectionSym =
- TLOF.getDwarfRangesSection()->getBeginSymbol();
+ DD->getDwarfVersion() >= 5
+ ? TLOF.getDwarfRnglistsSection()->getBeginSymbol()
+ : TLOF.getDwarfRangesSection()->getBeginSymbol();
RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range));
// Under fission, ranges are specified by constant offsets relative to the
// CU's DW_AT_GNU_ranges_base.
- if (isDwoUnit())
- addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
- RangeSectionSym);
- else
+ // FIXME: For DWARF v5, do not generate the DW_AT_ranges attribute under
+ // fission until we support the forms using the .debug_addr section
+ // (DW_RLE_startx_endx etc.).
+ if (isDwoUnit()) {
+ if (DD->getDwarfVersion() < 5)
+ addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ RangeSectionSym);
+ } else {
addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
RangeSectionSym);
+ }
// Add the range list to the set of ranges to be emitted.
(Skeleton ? Skeleton : this)->CURangeLists.push_back(std::move(List));
@@ -407,9 +434,10 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
void DwarfCompileUnit::attachRangesOrLowHighPC(
DIE &Die, SmallVector<RangeSpan, 2> Ranges) {
- if (Ranges.size() == 1) {
- const auto &single = Ranges.front();
- attachLowHighPC(Die, single.getStart(), single.getEnd());
+ if (Ranges.size() == 1 || !DD->useRangesSection()) {
+ const RangeSpan &Front = Ranges.front();
+ const RangeSpan &Back = Ranges.back();
+ attachLowHighPC(Die, Front.getStart(), Back.getEnd());
} else
addScopeRangeList(Die, std::move(Ranges));
}
@@ -443,7 +471,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
// Add the call site information to the DIE.
const DILocation *IA = Scope->getInlinedAt();
addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
- getOrCreateSourceID(IA->getFilename(), IA->getDirectory()));
+ getOrCreateSourceID(IA->getFile()));
addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4)
addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
@@ -482,6 +510,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
bool Abstract) {
// Define variable debug information entry.
auto VariableDie = DIE::get(DIEValueAllocator, DV.getTag());
+ insertDIE(DV.getVariable(), VariableDie);
if (Abstract) {
applyVariableAttributes(DV, *VariableDie);
@@ -547,8 +576,11 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
Ops.append(Expr->elements_begin(), Expr->elements_end());
DIExpressionCursor Cursor(Ops);
DwarfExpr.setMemoryLocationKind();
- DwarfExpr.addMachineRegExpression(
- *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg);
+ if (const MCSymbol *FrameSymbol = Asm->getFunctionFrameSymbol())
+ addOpAddress(*Loc, FrameSymbol);
+ else
+ DwarfExpr.addMachineRegExpression(
+ *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg);
DwarfExpr.addExpression(std::move(Cursor));
}
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
@@ -565,13 +597,95 @@ DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV,
return Var;
}
+/// Return all DIVariables that appear in count: expressions.
+static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) {
+ SmallVector<const DIVariable *, 2> Result;
+ auto *Array = dyn_cast<DICompositeType>(Var->getType());
+ if (!Array || Array->getTag() != dwarf::DW_TAG_array_type)
+ return Result;
+ for (auto *El : Array->getElements()) {
+ if (auto *Subrange = dyn_cast<DISubrange>(El)) {
+ auto Count = Subrange->getCount();
+ if (auto *Dependency = Count.dyn_cast<DIVariable *>())
+ Result.push_back(Dependency);
+ }
+ }
+ return Result;
+}
+
+/// Sort local variables so that variables appearing inside of helper
+/// expressions come first.
+static SmallVector<DbgVariable *, 8>
+sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
+ SmallVector<DbgVariable *, 8> Result;
+ SmallVector<PointerIntPair<DbgVariable *, 1>, 8> WorkList;
+ // Map back from a DIVariable to its containing DbgVariable.
+ SmallDenseMap<const DILocalVariable *, DbgVariable *> DbgVar;
+ // Set of DbgVariables in Result.
+ SmallDenseSet<DbgVariable *, 8> Visited;
+ // For cycle detection.
+ SmallDenseSet<DbgVariable *, 8> Visiting;
+
+ // Initialize the worklist and the DIVariable lookup table.
+ for (auto Var : reverse(Input)) {
+ DbgVar.insert({Var->getVariable(), Var});
+ WorkList.push_back({Var, 0});
+ }
+
+ // Perform a stable topological sort by doing a DFS.
+ while (!WorkList.empty()) {
+ auto Item = WorkList.back();
+ DbgVariable *Var = Item.getPointer();
+ bool visitedAllDependencies = Item.getInt();
+ WorkList.pop_back();
+
+ // Dependency is in a different lexical scope or a global.
+ if (!Var)
+ continue;
+
+ // Already handled.
+ if (Visited.count(Var))
+ continue;
+
+ // Add to Result if all dependencies are visited.
+ if (visitedAllDependencies) {
+ Visited.insert(Var);
+ Result.push_back(Var);
+ continue;
+ }
+
+ // Detect cycles.
+ auto Res = Visiting.insert(Var);
+ if (!Res.second) {
+ assert(false && "dependency cycle in local variables");
+ return Result;
+ }
+
+ // Push dependencies and this node onto the worklist, so that this node is
+ // visited again after all of its dependencies are handled.
+ WorkList.push_back({Var, 1});
+ for (auto *Dependency : dependencies(Var)) {
+ auto Dep = dyn_cast_or_null<const DILocalVariable>(Dependency);
+ WorkList.push_back({DbgVar[Dep], 0});
+ }
+ }
+ return Result;
+}
+
DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,
SmallVectorImpl<DIE *> &Children,
bool *HasNonScopeChildren) {
assert(Children.empty());
DIE *ObjectPointer = nullptr;
- for (DbgVariable *DV : DU->getScopeVariables().lookup(Scope))
+ // Emit function arguments (order is significant).
+ auto Vars = DU->getScopeVariables().lookup(Scope);
+ for (auto &DV : Vars.Args)
+ Children.push_back(constructVariableDIE(*DV.second, *Scope, ObjectPointer));
+
+ // Emit local variables.
+ auto Locals = sortLocalVars(Vars.Locals);
+ for (DbgVariable *DV : Locals)
Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer));
// Skip imported directives in gmlt-like data.
@@ -687,9 +801,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
else
EntityDie = getDIE(Entity);
assert(EntityDie);
- auto *File = Module->getFile();
- addSourceLine(*IMDie, Module->getLine(), File ? File->getFilename() : "",
- File ? File->getDirectory() : "");
+ addSourceLine(*IMDie, Module->getLine(), Module->getFile());
addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);
StringRef Name = Module->getName();
if (!Name.empty())
@@ -750,7 +862,7 @@ void DwarfCompileUnit::createAbstractVariable(const DILocalVariable *Var,
void DwarfCompileUnit::emitHeader(bool UseOffsets) {
// Don't bother labeling the .dwo unit, as its offset isn't used.
- if (!Skeleton) {
+ if (!Skeleton && !DD->useSectionsAsReferences()) {
LabelBegin = Asm->createTempSymbol("cu_begin");
Asm->OutStreamer->EmitLabel(LabelBegin);
}
@@ -759,6 +871,8 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) {
: DD->useSplitDwarf() ? dwarf::DW_UT_skeleton
: dwarf::DW_UT_compile;
DwarfUnit::emitCommonHeader(UseOffsets, UT);
+ if (DD->getDwarfVersion() >= 5 && UT != dwarf::DW_UT_compile)
+ Asm->emitInt64(getDWOId());
}
bool DwarfCompileUnit::hasDwarfPubSections() const {
@@ -767,7 +881,8 @@ bool DwarfCompileUnit::hasDwarfPubSections() const {
if (CUNode->getGnuPubnames())
return true;
- return DD->tuneForGDB() && !includeMinimalInlineScopes();
+ return DD->tuneForGDB() && DD->usePubSections() &&
+ !includeMinimalInlineScopes();
}
/// addGlobalName - Add a new global name to the compile unit.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 68482eb7e358..51e1558fe4a3 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -83,7 +83,10 @@ class DwarfCompileUnit final : public DwarfUnit {
DenseMap<const MDNode *, DIE *> AbstractSPDies;
DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
- /// \brief Construct a DIE for the given DbgVariable without initializing the
+ /// DWO ID for correlating skeleton and split units.
+ uint64_t DWOId = 0;
+
+ /// Construct a DIE for the given DbgVariable without initializing the
/// DbgVariable's DIE reference.
DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract);
@@ -141,7 +144,7 @@ public:
DwarfCompileUnit &getCU() override { return *this; }
- unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override;
+ unsigned getOrCreateSourceID(const DIFile *File) override;
void addImportedEntity(const DIImportedEntity* IE) {
DIScope *Scope = IE->getScope();
@@ -159,7 +162,7 @@ public:
void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End);
- /// \brief Find DIE for the given subprogram and attach appropriate
+ /// Find DIE for the given subprogram and attach appropriate
/// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
/// variables in this scope then create and insert DIEs for these
/// variables.
@@ -168,7 +171,7 @@ public:
void constructScopeDIE(LexicalScope *Scope,
SmallVectorImpl<DIE *> &FinalChildren);
- /// \brief A helper function to construct a RangeSpanList for a given
+ /// A helper function to construct a RangeSpanList for a given
/// lexical scope.
void addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range);
@@ -177,11 +180,11 @@ public:
void attachRangesOrLowHighPC(DIE &D,
const SmallVectorImpl<InsnRange> &Ranges);
- /// \brief This scope represents inlined body of a function. Construct
+ /// This scope represents inlined body of a function. Construct
/// DIE to represent this concrete inlined copy of the function.
DIE *constructInlinedScopeDIE(LexicalScope *Scope);
- /// \brief Construct new DW_TAG_lexical_block for this scope and
+ /// Construct new DW_TAG_lexical_block for this scope and
/// attach DW_AT_low_pc/DW_AT_high_pc labels.
DIE *constructLexicalScopeDIE(LexicalScope *Scope);
@@ -196,14 +199,14 @@ public:
SmallVectorImpl<DIE *> &Children,
bool *HasNonScopeChildren = nullptr);
- /// \brief Construct a DIE for this subprogram scope.
+ /// Construct a DIE for this subprogram scope.
void constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope);
DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE);
void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
- /// \brief Construct import_module DIE.
+ /// Construct import_module DIE.
DIE *constructImportedEntityDIE(const DIImportedEntity *Module);
void finishSubprogramDefinition(const DISubprogram *SP);
@@ -214,11 +217,18 @@ public:
DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
const DILocalVariable *&Cleansed);
DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
- void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope);
+ void createAbstractVariable(const DILocalVariable *Var, LexicalScope *Scope);
/// Set the skeleton unit associated with this unit.
void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
+ unsigned getHeaderSize() const override {
+ // DWARF v5 added the DWO ID to the header for split/skeleton units.
+ unsigned DWOIdSize =
+ DD->getDwarfVersion() >= 5 && DD->useSplitDwarf() ? sizeof(uint64_t)
+ : 0;
+ return DwarfUnit::getHeaderSize() + DWOIdSize;
+ }
unsigned getLength() {
return sizeof(uint32_t) + // Length field
getHeaderSize() + getUnitDie().getSize();
@@ -290,6 +300,9 @@ public:
void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; }
const MCSymbol *getBaseAddress() const { return BaseAddress; }
+ uint64_t getDWOId() const { return DWOId; }
+ void setDWOId(uint64_t DwoId) { DWOId = DwoId; }
+
bool hasDwarfPubSections() const;
};
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 2c9c7d4f3146..8761fae9dd22 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -16,7 +16,6 @@
#include "DIEHash.h"
#include "DebugLocEntry.h"
#include "DebugLocStream.h"
-#include "DwarfAccelTable.h"
#include "DwarfCompileUnit.h"
#include "DwarfExpression.h"
#include "DwarfFile.h"
@@ -31,6 +30,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AccelTable.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/LexicalScopes.h"
@@ -39,7 +39,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
@@ -66,6 +65,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
@@ -94,6 +94,11 @@ static cl::opt<bool> GenerateARangeSection("generate-arange-section",
cl::desc("Generate dwarf aranges"),
cl::init(false));
+static cl::opt<bool>
+ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
+ cl::desc("Generate DWARF4 type units."),
+ cl::init(false));
+
static cl::opt<bool> SplitDwarfCrossCuReferences(
"split-dwarf-cross-cu-references", cl::Hidden,
cl::desc("Enable cross-cu references in DWO files"), cl::init(false));
@@ -107,14 +112,40 @@ static cl::opt<DefaultOnOff> UnknownLocations(
clEnumVal(Enable, "In all cases"), clEnumVal(Disable, "Never")),
cl::init(Default));
+static cl::opt<AccelTableKind> AccelTables(
+ "accel-tables", cl::Hidden, cl::desc("Output dwarf accelerator tables."),
+ cl::values(clEnumValN(AccelTableKind::Default, "Default",
+ "Default for platform"),
+ clEnumValN(AccelTableKind::None, "Disable", "Disabled."),
+ clEnumValN(AccelTableKind::Apple, "Apple", "Apple"),
+ clEnumValN(AccelTableKind::Dwarf, "Dwarf", "DWARF")),
+ cl::init(AccelTableKind::Default));
+
static cl::opt<DefaultOnOff>
-DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
- cl::desc("Output prototype dwarf accelerator tables."),
+DwarfInlinedStrings("dwarf-inlined-strings", cl::Hidden,
+ cl::desc("Use inlined strings rather than string section."),
cl::values(clEnumVal(Default, "Default for platform"),
clEnumVal(Enable, "Enabled"),
clEnumVal(Disable, "Disabled")),
cl::init(Default));
+static cl::opt<bool>
+ NoDwarfPubSections("no-dwarf-pub-sections", cl::Hidden,
+ cl::desc("Disable emission of DWARF pub sections."),
+ cl::init(false));
+
+static cl::opt<bool>
+ NoDwarfRangesSection("no-dwarf-ranges-section", cl::Hidden,
+ cl::desc("Disable emission .debug_ranges section."),
+ cl::init(false));
+
+static cl::opt<DefaultOnOff> DwarfSectionsAsReferences(
+ "dwarf-sections-as-references", cl::Hidden,
+ cl::desc("Use sections+offset as references rather than labels."),
+ cl::values(clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")),
+ cl::init(Default));
+
enum LinkageNameOption {
DefaultLinkageNames,
AllLinkageNames,
@@ -215,11 +246,11 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
return A.Expr->isFragment();
}) &&
"multiple FI expressions without DW_OP_LLVM_fragment");
- std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(),
- [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
- return A.Expr->getFragmentInfo()->OffsetInBits <
- B.Expr->getFragmentInfo()->OffsetInBits;
- });
+ llvm::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(),
+ [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
+ return A.Expr->getFragmentInfo()->OffsetInBits <
+ B.Expr->getFragmentInfo()->OffsetInBits;
+ });
return FrameIndexExprs;
}
@@ -258,23 +289,34 @@ void DbgVariable::addMMIEntry(const DbgVariable &V) {
"conflicting locations for variable");
}
-static const DwarfAccelTable::Atom TypeAtoms[] = {
- DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
- DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
- DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
+static AccelTableKind computeAccelTableKind(unsigned DwarfVersion,
+ bool GenerateTypeUnits,
+ DebuggerKind Tuning,
+ const Triple &TT) {
+ // Honor an explicit request.
+ if (AccelTables != AccelTableKind::Default)
+ return AccelTables;
+
+ // Accelerator tables with type units are currently not supported.
+ if (GenerateTypeUnits)
+ return AccelTableKind::None;
+
+ // Accelerator tables get emitted if targetting DWARF v5 or LLDB. DWARF v5
+ // always implies debug_names. For lower standard versions we use apple
+ // accelerator tables on apple platforms and debug_names elsewhere.
+ if (DwarfVersion >= 5)
+ return AccelTableKind::Dwarf;
+ if (Tuning == DebuggerKind::LLDB)
+ return TT.isOSBinFormatMachO() ? AccelTableKind::Apple
+ : AccelTableKind::Dwarf;
+ return AccelTableKind::None;
+}
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
: DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()),
InfoHolder(A, "info_string", DIEValueAllocator),
SkeletonHolder(A, "skel_string", DIEValueAllocator),
- IsDarwin(A->TM.getTargetTriple().isOSDarwin()),
- AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
- dwarf::DW_FORM_data4)),
- AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
- dwarf::DW_FORM_data4)),
- AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
- dwarf::DW_FORM_data4)),
- AccelTypes(TypeAtoms) {
+ IsDarwin(A->TM.getTargetTriple().isOSDarwin()) {
const Triple &TT = Asm->TM.getTargetTriple();
// Make sure we know our "debugger tuning." The target option takes
@@ -288,11 +330,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
else
DebuggerTuning = DebuggerKind::GDB;
- // Turn on accelerator tables for LLDB by default.
- if (DwarfAccelTables == Default)
- HasDwarfAccelTables = tuneForLLDB();
+ if (DwarfInlinedStrings == Default)
+ UseInlineStrings = TT.isNVPTX();
else
- HasDwarfAccelTables = DwarfAccelTables == Enable;
+ UseInlineStrings = DwarfInlinedStrings == Enable;
+
+ UseLocSection = !TT.isNVPTX();
HasAppleExtensionAttributes = tuneForLLDB();
@@ -308,8 +351,23 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
unsigned DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
: MMI->getModule()->getDwarfVersion();
- // Use dwarf 4 by default if nothing is requested.
- DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION;
+ // Use dwarf 4 by default if nothing is requested. For NVPTX, use dwarf 2.
+ DwarfVersion =
+ TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION);
+
+ UsePubSections = !NoDwarfPubSections && !TT.isNVPTX();
+ UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX();
+
+ // Use sections as references. Force for NVPTX.
+ if (DwarfSectionsAsReferences == Default)
+ UseSectionsAsReferences = TT.isNVPTX();
+ else
+ UseSectionsAsReferences = DwarfSectionsAsReferences == Enable;
+
+ GenerateTypeUnits = GenerateDwarfTypeUnits;
+
+ TheAccelTableKind = computeAccelTableKind(
+ DwarfVersion, GenerateTypeUnits, DebuggerTuning, A->TM.getTargetTriple());
// Work around a GDB bug. GDB doesn't support the standard opcode;
// SCE doesn't support GNU's; LLDB prefers the standard opcode, which
@@ -321,6 +379,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
// GDB does not fully support the DWARF 4 representation for bitfields.
UseDWARF2Bitfields = (DwarfVersion < 4) || tuneForGDB();
+ // The DWARF v5 string offsets table has - possibly shared - contributions
+ // from each compile and type unit each preceded by a header. The string
+ // offsets table used by the pre-DWARF v5 split-DWARF implementation uses
+ // a monolithic string offsets table without any header.
+ UseSegmentedStringOffsetsTable = DwarfVersion >= 5;
+
Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
}
@@ -355,17 +419,18 @@ static StringRef getObjCMethodName(StringRef In) {
}
// Add the various names to the Dwarf accelerator table names.
-// TODO: Determine whether or not we should add names for programs
-// that do not have a DW_AT_name or DW_AT_linkage_name field - this
-// is only slightly different than the lookup of non-standard ObjC names.
void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) {
if (!SP->isDefinition())
return;
- addAccelName(SP->getName(), Die);
- // If the linkage name is different than the name, go ahead and output
- // that as well into the name table.
- if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName())
+ if (SP->getName() != "")
+ addAccelName(SP->getName(), Die);
+
+ // If the linkage name is different than the name, go ahead and output that as
+ // well into the name table. Only do that if we are going to actually emit
+ // that name.
+ if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() &&
+ (useAllLinkageNames() || InfoHolder.getAbstractSPDies().lookup(SP)))
addAccelName(SP->getLinkageName(), Die);
// If this is an Objective-C selector name add it to the ObjC accelerator
@@ -471,8 +536,9 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
// explicitly describe the directory of all files, never relying on the
// compilation directory.
if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU)
- Asm->OutStreamer->getContext().setMCLineTableCompilationDir(
- NewCU.getUniqueID(), CompilationDir);
+ Asm->OutStreamer->emitDwarfFile0Directive(
+ CompilationDir, FN, NewCU.getMD5AsBytes(DIUnit->getFile()),
+ DIUnit->getSource(), NewCU.getUniqueID());
StringRef Producer = DIUnit->getProducer();
StringRef Flags = DIUnit->getFlags();
@@ -486,6 +552,10 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
DIUnit->getSourceLanguage());
NewCU.addString(Die, dwarf::DW_AT_name, FN);
+ // Add DW_str_offsets_base to the unit DIE, except for split units.
+ if (useSegmentedStringOffsetsTable() && !useSplitDwarf())
+ NewCU.addStringOffsetsStart();
+
if (!useSplitDwarf()) {
NewCU.initStmtList();
@@ -541,21 +611,22 @@ void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
/// Sort and unique GVEs by comparing their fragment offset.
static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &
sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
- std::sort(GVEs.begin(), GVEs.end(),
- [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) {
- // Sort order: first null exprs, then exprs without fragment
- // info, then sort by fragment offset in bits.
- // FIXME: Come up with a more comprehensive comparator so
- // the sorting isn't non-deterministic, and so the following
- // std::unique call works correctly.
- if (!A.Expr || !B.Expr)
- return !!B.Expr;
- auto FragmentA = A.Expr->getFragmentInfo();
- auto FragmentB = B.Expr->getFragmentInfo();
- if (!FragmentA || !FragmentB)
- return !!FragmentB;
- return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
- });
+ llvm::sort(GVEs.begin(), GVEs.end(),
+ [](DwarfCompileUnit::GlobalExpr A,
+ DwarfCompileUnit::GlobalExpr B) {
+ // Sort order: first null exprs, then exprs without fragment
+ // info, then sort by fragment offset in bits.
+ // FIXME: Come up with a more comprehensive comparator so
+ // the sorting isn't non-deterministic, and so the following
+ // std::unique call works correctly.
+ if (!A.Expr || !B.Expr)
+ return !!B.Expr;
+ auto FragmentA = A.Expr->getFragmentInfo();
+ auto FragmentB = B.Expr->getFragmentInfo();
+ if (!FragmentA || !FragmentB)
+ return !!FragmentB;
+ return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
+ });
GVEs.erase(std::unique(GVEs.begin(), GVEs.end(),
[](DwarfCompileUnit::GlobalExpr A,
DwarfCompileUnit::GlobalExpr B) {
@@ -590,6 +661,19 @@ void DwarfDebug::beginModule() {
GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()});
}
+ // Create the symbol that designates the start of the unit's contribution
+ // to the string offsets table. In a split DWARF scenario, only the skeleton
+ // unit has the DW_AT_str_offsets_base attribute (and hence needs the symbol).
+ if (useSegmentedStringOffsetsTable())
+ (useSplitDwarf() ? SkeletonHolder : InfoHolder)
+ .setStringOffsetsStartSym(Asm->createTempSymbol("str_offsets_base"));
+
+ // Create the symbol that designates the start of the DWARF v5 range list
+ // table. It is located past the header and before the offsets table.
+ if (getDwarfVersion() >= 5)
+ (useSplitDwarf() ? SkeletonHolder : InfoHolder)
+ .setRnglistsTableBaseSym(Asm->createTempSymbol("rnglists_table_base"));
+
for (DICompileUnit *CUNode : M->debug_compile_units()) {
// FIXME: Move local imported entities into a list attached to the
// subprogram, then this search won't be needed and a
@@ -694,11 +778,15 @@ void DwarfDebug::finalizeModuleInfo() {
// Emit a unique identifier for this CU.
uint64_t ID =
DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie());
- TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
- dwarf::DW_FORM_data8, ID);
- SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
- dwarf::DW_FORM_data8, ID);
-
+ if (getDwarfVersion() >= 5) {
+ TheCU.setDWOId(ID);
+ SkCU->setDWOId(ID);
+ } else {
+ TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
+ dwarf::DW_FORM_data8, ID);
+ SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
+ dwarf::DW_FORM_data8, ID);
+ }
// We don't keep track of which addresses are used in which CU so this
// is a bit pessimistic under LTO.
if (!AddrPool.isEmpty()) {
@@ -706,7 +794,7 @@ void DwarfDebug::finalizeModuleInfo() {
SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base,
Sym, Sym);
}
- if (!SkCU->getRangeLists().empty()) {
+ if (getDwarfVersion() < 5 && !SkCU->getRangeLists().empty()) {
const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol();
SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,
Sym, Sym);
@@ -721,7 +809,7 @@ void DwarfDebug::finalizeModuleInfo() {
// ranges for all subprogram DIEs for mach-o.
DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
if (unsigned NumRanges = TheCU.getRanges().size()) {
- if (NumRanges > 1)
+ if (NumRanges > 1 && useRangesSection())
// A DW_AT_low_pc attribute may also be specified in combination with
// DW_AT_ranges to specify the default base address for use in
// location lists (see Section 2.6.2) and range lists (see Section
@@ -732,6 +820,10 @@ void DwarfDebug::finalizeModuleInfo() {
U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
}
+ if (getDwarfVersion() >= 5 && !useSplitDwarf() &&
+ !U.getRangeLists().empty())
+ U.addRnglistsBase();
+
auto *CUNode = cast<DICompileUnit>(P.first);
// If compile Unit has macros, emit "DW_AT_macro_info" attribute.
if (CUNode->getMacros())
@@ -799,11 +891,20 @@ void DwarfDebug::endModule() {
}
// Emit info into the dwarf accelerator table sections.
- if (useDwarfAccelTables()) {
+ switch (getAccelTableKind()) {
+ case AccelTableKind::Apple:
emitAccelNames();
emitAccelObjC();
emitAccelNamespaces();
emitAccelTypes();
+ break;
+ case AccelTableKind::Dwarf:
+ emitAccelDebugNames();
+ break;
+ case AccelTableKind::None:
+ break;
+ case AccelTableKind::Default:
+ llvm_unreachable("Default should have already been resolved.");
}
// Emit the pubnames and pubtypes sections if requested.
@@ -887,7 +988,7 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
}
-/// \brief If this and Next are describing different fragments of the same
+/// If this and Next are describing different fragments of the same
/// variable, merge them by appending Next's values to the current
/// list of values.
/// Return true if the merge was successful.
@@ -903,8 +1004,7 @@ bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {
// sorted.
for (unsigned i = 0, j = 0; i < Values.size(); ++i) {
for (; j < Next.Values.size(); ++j) {
- int res = DebugHandlerBase::fragmentCmp(
- cast<DIExpression>(Values[i].Expression),
+ int res = cast<DIExpression>(Values[i].Expression)->fragmentCmp(
cast<DIExpression>(Next.Values[j].Expression));
if (res == 0) // The two expressions overlap, we can't merge.
return false;
@@ -967,7 +1067,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
// If this fragment overlaps with any open ranges, truncate them.
const DIExpression *DIExpr = Begin->getDebugExpression();
auto Last = remove_if(OpenRanges, [&](DebugLocEntry::Value R) {
- return fragmentsOverlap(DIExpr, R.getExpression());
+ return DIExpr->fragmentsOverlap(R.getExpression());
});
OpenRanges.erase(Last, OpenRanges.end());
@@ -983,7 +1083,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
EndLabel = getLabelBeforeInsn(std::next(I)->first);
assert(EndLabel && "Forgot label after instruction ending a range!");
- DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n");
+ LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n");
auto Value = getDebugLocValue(Begin);
DebugLocEntry Loc(StartLabel, EndLabel, Value);
@@ -1012,7 +1112,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
// Attempt to coalesce the ranges of two otherwise identical
// DebugLocEntries.
auto CurEntry = DebugLoc.rbegin();
- DEBUG({
+ LLVM_DEBUG({
dbgs() << CurEntry->getValues().size() << " Values:\n";
for (auto &Value : CurEntry->getValues())
Value.dump();
@@ -1131,6 +1231,9 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
RegVar->initializeDbgValue(MInsn);
continue;
}
+ // Do not emit location lists if .debug_loc secton is disabled.
+ if (!useLocSection())
+ continue;
// Handle multiple DBG_VALUE instructions describing one variable.
DebugLocStream::ListBuilder List(DebugLocs, TheCU, *Asm, *RegVar, *MInsn);
@@ -1151,10 +1254,12 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
}
// Collect info for variables that were optimized out.
- for (const DILocalVariable *DV : SP->getVariables()) {
- if (Processed.insert(InlinedVariable(DV, nullptr)).second)
- if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope()))
- createConcreteVariable(TheCU, *Scope, InlinedVariable(DV, nullptr));
+ for (const DINode *DN : SP->getRetainedNodes()) {
+ if (auto *DV = dyn_cast<DILocalVariable>(DN)) {
+ if (Processed.insert(InlinedVariable(DV, nullptr)).second)
+ if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope()))
+ createConcreteVariable(TheCU, *Scope, InlinedVariable(DV, nullptr));
+ }
}
}
@@ -1168,7 +1273,9 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
return;
// Check if source location changes, but ignore DBG_VALUE and CFI locations.
- if (MI->isMetaInstruction())
+ // If the instruction is part of the function frame setup code, do not emit
+ // any line record, as there is no correspondence with any user code.
+ if (MI->isMetaInstruction() || MI->getFlag(MachineInstr::FrameSetup))
return;
const DebugLoc &DL = MI->getDebugLoc();
// When we emit a line-0 record, we don't update PrevInstLoc; so look at
@@ -1333,14 +1440,16 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
// Construct abstract scopes.
for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
auto *SP = cast<DISubprogram>(AScope->getScopeNode());
- // Collect info for variables that were optimized out.
- for (const DILocalVariable *DV : SP->getVariables()) {
- if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second)
- continue;
- ensureAbstractVariableIsCreated(TheCU, InlinedVariable(DV, nullptr),
- DV->getScope());
- assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
- && "ensureAbstractVariableIsCreated inserted abstract scopes");
+ for (const DINode *DN : SP->getRetainedNodes()) {
+ if (auto *DV = dyn_cast<DILocalVariable>(DN)) {
+ // Collect info for variables that were optimized out.
+ if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second)
+ continue;
+ ensureAbstractVariableIsCreated(TheCU, InlinedVariable(DV, nullptr),
+ DV->getScope());
+ assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
+ && "ensureAbstractVariableIsCreated inserted abstract scopes");
+ }
}
constructAbstractSubprogramScopeDIE(TheCU, AScope);
}
@@ -1366,21 +1475,19 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
unsigned Flags) {
StringRef Fn;
- StringRef Dir;
- unsigned Src = 1;
+ unsigned FileNo = 1;
unsigned Discriminator = 0;
if (auto *Scope = cast_or_null<DIScope>(S)) {
Fn = Scope->getFilename();
- Dir = Scope->getDirectory();
if (Line != 0 && getDwarfVersion() >= 4)
if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
Discriminator = LBF->getDiscriminator();
unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID();
- Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
- .getOrCreateSourceID(Fn, Dir);
+ FileNo = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
+ .getOrCreateSourceID(Scope->getFile());
}
- Asm->OutStreamer->EmitDwarfLocDirective(Src, Line, Col, Flags, 0,
+ Asm->OutStreamer->EmitDwarfLocDirective(FileNo, Line, Col, Flags, 0,
Discriminator, Fn);
}
@@ -1401,13 +1508,30 @@ void DwarfDebug::emitAbbreviations() {
Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
}
-void DwarfDebug::emitAccel(DwarfAccelTable &Accel, MCSection *Section,
+void DwarfDebug::emitStringOffsetsTableHeader() {
+ DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Holder.getStringPool().emitStringOffsetsTableHeader(
+ *Asm, Asm->getObjFileLowering().getDwarfStrOffSection(),
+ Holder.getStringOffsetsStartSym());
+}
+
+template <typename AccelTableT>
+void DwarfDebug::emitAccel(AccelTableT &Accel, MCSection *Section,
StringRef TableName) {
- Accel.FinalizeTable(Asm, TableName);
Asm->OutStreamer->SwitchSection(Section);
// Emit the full data.
- Accel.emit(Asm, Section->getBeginSymbol(), this);
+ emitAppleAccelTable(Asm, Accel, TableName, Section->getBeginSymbol());
+}
+
+void DwarfDebug::emitAccelDebugNames() {
+ // Don't emit anything if we have no compilation units to index.
+ if (getUnits().empty())
+ return;
+
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfDebugNamesSection());
+ emitDWARF5AccelTable(Asm, AccelDebugNames, *this, getUnits());
}
// Emit visible names into a hashed accelerator table section.
@@ -1525,6 +1649,14 @@ void DwarfDebug::emitDebugPubSections() {
}
}
+void DwarfDebug::emitSectionReference(const DwarfCompileUnit &CU) {
+ if (useSectionsAsReferences())
+ Asm->EmitDwarfOffset(CU.getSection()->getBeginSymbol(),
+ CU.getDebugSectionOffset());
+ else
+ Asm->emitDwarfSymbolReference(CU.getLabelBegin());
+}
+
void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
DwarfCompileUnit *TheU,
const StringMap<const DIE *> &Globals) {
@@ -1540,13 +1672,13 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
Asm->OutStreamer->EmitLabel(BeginLabel);
Asm->OutStreamer->AddComment("DWARF Version");
- Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION);
+ Asm->emitInt16(dwarf::DW_PUBNAMES_VERSION);
Asm->OutStreamer->AddComment("Offset of Compilation Unit Info");
- Asm->emitDwarfSymbolReference(TheU->getLabelBegin());
+ emitSectionReference(*TheU);
Asm->OutStreamer->AddComment("Compilation Unit Length");
- Asm->EmitInt32(TheU->getLength());
+ Asm->emitInt32(TheU->getLength());
// Emit the pubnames for this compilation unit.
for (const auto &GI : Globals) {
@@ -1554,14 +1686,14 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
const DIE *Entity = GI.second;
Asm->OutStreamer->AddComment("DIE offset");
- Asm->EmitInt32(Entity->getOffset());
+ Asm->emitInt32(Entity->getOffset());
if (GnuStyle) {
dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
Asm->OutStreamer->AddComment(
Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
- Asm->EmitInt8(Desc.toBits());
+ Asm->emitInt8(Desc.toBits());
}
Asm->OutStreamer->AddComment("External Name");
@@ -1569,14 +1701,20 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
}
Asm->OutStreamer->AddComment("End Mark");
- Asm->EmitInt32(0);
+ Asm->emitInt32(0);
Asm->OutStreamer->EmitLabel(EndLabel);
}
/// Emit null-terminated strings into a debug str section.
void DwarfDebug::emitDebugStr() {
+ MCSection *StringOffsetsSection = nullptr;
+ if (useSegmentedStringOffsetsTable()) {
+ emitStringOffsetsTableHeader();
+ StringOffsetsSection = Asm->getObjFileLowering().getDwarfStrOffSection();
+ }
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
- Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
+ Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection(),
+ StringOffsetsSection, /* UseRelativeOffsets = */ true);
}
void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
@@ -1589,7 +1727,6 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
}
static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
- ByteStreamer &Streamer,
const DebugLocEntry::Value &Value,
DwarfExpression &DwarfExpr) {
auto *DIExpr = Value.getExpression();
@@ -1634,11 +1771,11 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
"fragments are expected to be sorted");
for (auto Fragment : Values)
- emitDebugLocValue(AP, BT, Streamer, Fragment, DwarfExpr);
+ emitDebugLocValue(AP, BT, Fragment, DwarfExpr);
} else {
assert(Values.size() == 1 && "only fragments may have >1 value");
- emitDebugLocValue(AP, BT, Streamer, Value, DwarfExpr);
+ emitDebugLocValue(AP, BT, Value, DwarfExpr);
}
DwarfExpr.finalize();
}
@@ -1646,7 +1783,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
// Emit the size.
Asm->OutStreamer->AddComment("Loc expr size");
- Asm->EmitInt16(DebugLocs.getBytes(Entry).size());
+ Asm->emitInt16(DebugLocs.getBytes(Entry).size());
// Emit the entry.
APByteStreamer Streamer(*Asm);
@@ -1694,14 +1831,14 @@ void DwarfDebug::emitDebugLocDWO() {
// rather than two. We could get fancier and try to, say, reuse an
// address we know we've emitted elsewhere (the start of the function?
// The start of the CU or CU subrange that encloses this range?)
- Asm->EmitInt8(dwarf::DW_LLE_startx_length);
+ Asm->emitInt8(dwarf::DW_LLE_startx_length);
unsigned idx = AddrPool.getIndex(Entry.BeginSym);
Asm->EmitULEB128(idx);
Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4);
emitDebugLocEntryLocation(Entry);
}
- Asm->EmitInt8(dwarf::DW_LLE_end_of_list);
+ Asm->emitInt8(dwarf::DW_LLE_end_of_list);
}
}
@@ -1752,7 +1889,7 @@ void DwarfDebug::emitDebugARanges() {
}
// Sort the symbols by offset within the section.
- std::sort(
+ std::stable_sort(
List.begin(), List.end(), [&](const SymbolCU &A, const SymbolCU &B) {
unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
@@ -1801,10 +1938,10 @@ void DwarfDebug::emitDebugARanges() {
}
// Sort the CU list (again, to ensure consistent output order).
- std::sort(CUs.begin(), CUs.end(),
- [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) {
- return A->getUniqueID() < B->getUniqueID();
- });
+ llvm::sort(CUs.begin(), CUs.end(),
+ [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) {
+ return A->getUniqueID() < B->getUniqueID();
+ });
// Emit an arange table for each CU we used.
for (DwarfCompileUnit *CU : CUs) {
@@ -1832,15 +1969,15 @@ void DwarfDebug::emitDebugARanges() {
// For each compile unit, write the list of spans it covers.
Asm->OutStreamer->AddComment("Length of ARange Set");
- Asm->EmitInt32(ContentSize);
+ Asm->emitInt32(ContentSize);
Asm->OutStreamer->AddComment("DWARF Arange version number");
- Asm->EmitInt16(dwarf::DW_ARANGES_VERSION);
+ Asm->emitInt16(dwarf::DW_ARANGES_VERSION);
Asm->OutStreamer->AddComment("Offset Into Debug Info Section");
- Asm->emitDwarfSymbolReference(CU->getLabelBegin());
+ emitSectionReference(*CU);
Asm->OutStreamer->AddComment("Address Size (in bytes)");
- Asm->EmitInt8(PtrSize);
+ Asm->emitInt8(PtrSize);
Asm->OutStreamer->AddComment("Segment Size (in bytes)");
- Asm->EmitInt8(0);
+ Asm->emitInt8(0);
Asm->OutStreamer->emitFill(Padding, 0xff);
@@ -1867,17 +2004,151 @@ void DwarfDebug::emitDebugARanges() {
}
}
-/// Emit address ranges into a debug ranges section.
+/// Emit a single range list. We handle both DWARF v5 and earlier.
+static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU,
+ const RangeSpanList &List) {
+
+ auto DwarfVersion = CU->getDwarfVersion();
+ // Emit our symbol so we can find the beginning of the range.
+ Asm->OutStreamer->EmitLabel(List.getSym());
+ // Gather all the ranges that apply to the same section so they can share
+ // a base address entry.
+ MapVector<const MCSection *, std::vector<const RangeSpan *>> SectionRanges;
+ // Size for our labels.
+ auto Size = Asm->MAI->getCodePointerSize();
+
+ for (const RangeSpan &Range : List.getRanges())
+ SectionRanges[&Range.getStart()->getSection()].push_back(&Range);
+
+ auto *CUBase = CU->getBaseAddress();
+ bool BaseIsSet = false;
+ for (const auto &P : SectionRanges) {
+ // Don't bother with a base address entry if there's only one range in
+ // this section in this range list - for example ranges for a CU will
+ // usually consist of single regions from each of many sections
+ // (-ffunction-sections, or just C++ inline functions) except under LTO
+ // or optnone where there may be holes in a single CU's section
+ // contributions.
+ auto *Base = CUBase;
+ if (!Base && P.second.size() > 1 &&
+ (UseDwarfRangesBaseAddressSpecifier || DwarfVersion >= 5)) {
+ BaseIsSet = true;
+ // FIXME/use care: This may not be a useful base address if it's not
+ // the lowest address/range in this object.
+ Base = P.second.front()->getStart();
+ if (DwarfVersion >= 5) {
+ Asm->OutStreamer->AddComment("DW_RLE_base_address");
+ Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_address, 1);
+ } else
+ Asm->OutStreamer->EmitIntValue(-1, Size);
+ Asm->OutStreamer->AddComment(" base address");
+ Asm->OutStreamer->EmitSymbolValue(Base, Size);
+ } else if (BaseIsSet && DwarfVersion < 5) {
+ BaseIsSet = false;
+ assert(!Base);
+ Asm->OutStreamer->EmitIntValue(-1, Size);
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ }
+
+ for (const auto *RS : P.second) {
+ const MCSymbol *Begin = RS->getStart();
+ const MCSymbol *End = RS->getEnd();
+ assert(Begin && "Range without a begin symbol?");
+ assert(End && "Range without an end symbol?");
+ if (Base) {
+ if (DwarfVersion >= 5) {
+ // Emit DW_RLE_offset_pair when we have a base.
+ Asm->OutStreamer->AddComment("DW_RLE_offset_pair");
+ Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_offset_pair, 1);
+ Asm->OutStreamer->AddComment(" starting offset");
+ Asm->EmitLabelDifferenceAsULEB128(Begin, Base);
+ Asm->OutStreamer->AddComment(" ending offset");
+ Asm->EmitLabelDifferenceAsULEB128(End, Base);
+ } else {
+ Asm->EmitLabelDifference(Begin, Base, Size);
+ Asm->EmitLabelDifference(End, Base, Size);
+ }
+ } else if (DwarfVersion >= 5) {
+ Asm->OutStreamer->AddComment("DW_RLE_start_length");
+ Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_start_length, 1);
+ Asm->OutStreamer->AddComment(" start");
+ Asm->OutStreamer->EmitSymbolValue(Begin, Size);
+ Asm->OutStreamer->AddComment(" length");
+ Asm->EmitLabelDifferenceAsULEB128(End, Begin);
+ } else {
+ Asm->OutStreamer->EmitSymbolValue(Begin, Size);
+ Asm->OutStreamer->EmitSymbolValue(End, Size);
+ }
+ }
+ }
+ if (DwarfVersion >= 5) {
+ Asm->OutStreamer->AddComment("DW_RLE_end_of_list");
+ Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_end_of_list, 1);
+ } else {
+ // Terminate the list with two 0 values.
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ }
+}
+
+// Emit the header of a DWARF 5 range list table. Returns the symbol that
+// designates the end of the table for the caller to emit when the table is
+// complete.
+static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, DwarfFile &Holder) {
+ // The length is described by a starting label right after the length field
+ // and an end label.
+ MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start");
+ MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end");
+ // Build the range table header, which starts with the length field.
+ Asm->EmitLabelDifference(TableEnd, TableStart, 4);
+ Asm->OutStreamer->EmitLabel(TableStart);
+ // Version number (DWARF v5 and later).
+ Asm->emitInt16(Asm->OutStreamer->getContext().getDwarfVersion());
+ // Address size.
+ Asm->emitInt8(Asm->MAI->getCodePointerSize());
+ // Segment selector size.
+ Asm->emitInt8(0);
+
+ MCSymbol *RnglistTableBaseSym = Holder.getRnglistsTableBaseSym();
+
+ // FIXME: Generate the offsets table and use DW_FORM_rnglistx with the
+ // DW_AT_ranges attribute. Until then set the number of offsets to 0.
+ Asm->emitInt32(0);
+ Asm->OutStreamer->EmitLabel(RnglistTableBaseSym);
+ return TableEnd;
+}
+
+/// Emit address ranges into the .debug_ranges section or into the DWARF v5
+/// .debug_rnglists section.
void DwarfDebug::emitDebugRanges() {
if (CUMap.empty())
return;
- // Start the dwarf ranges section.
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfRangesSection());
+ auto NoRangesPresent = [this]() {
+ return llvm::all_of(
+ CUMap, [](const decltype(CUMap)::const_iterator::value_type &Pair) {
+ return Pair.second->getRangeLists().empty();
+ });
+ };
- // Size for our labels.
- unsigned char Size = Asm->MAI->getCodePointerSize();
+ if (!useRangesSection()) {
+ assert(NoRangesPresent() && "No debug ranges expected.");
+ return;
+ }
+
+ if (getDwarfVersion() >= 5 && NoRangesPresent())
+ return;
+
+ // Start the dwarf ranges section.
+ MCSymbol *TableEnd = nullptr;
+ if (getDwarfVersion() >= 5) {
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfRnglistsSection());
+ TableEnd = emitRnglistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder
+ : InfoHolder);
+ } else
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfRangesSection());
// Grab the specific ranges for the compile units in the module.
for (const auto &I : CUMap) {
@@ -1887,61 +2158,12 @@ void DwarfDebug::emitDebugRanges() {
TheCU = Skel;
// Iterate over the misc ranges for the compile units in the module.
- for (const RangeSpanList &List : TheCU->getRangeLists()) {
- // Emit our symbol so we can find the beginning of the range.
- Asm->OutStreamer->EmitLabel(List.getSym());
-
- // Gather all the ranges that apply to the same section so they can share
- // a base address entry.
- MapVector<const MCSection *, std::vector<const RangeSpan *>> MV;
- for (const RangeSpan &Range : List.getRanges()) {
- MV[&Range.getStart()->getSection()].push_back(&Range);
- }
-
- auto *CUBase = TheCU->getBaseAddress();
- bool BaseIsSet = false;
- for (const auto &P : MV) {
- // Don't bother with a base address entry if there's only one range in
- // this section in this range list - for example ranges for a CU will
- // usually consist of single regions from each of many sections
- // (-ffunction-sections, or just C++ inline functions) except under LTO
- // or optnone where there may be holes in a single CU's section
- // contrubutions.
- auto *Base = CUBase;
- if (!Base && P.second.size() > 1 &&
- UseDwarfRangesBaseAddressSpecifier) {
- BaseIsSet = true;
- // FIXME/use care: This may not be a useful base address if it's not
- // the lowest address/range in this object.
- Base = P.second.front()->getStart();
- Asm->OutStreamer->EmitIntValue(-1, Size);
- Asm->OutStreamer->EmitSymbolValue(Base, Size);
- } else if (BaseIsSet) {
- BaseIsSet = false;
- Asm->OutStreamer->EmitIntValue(-1, Size);
- Asm->OutStreamer->EmitIntValue(0, Size);
- }
-
- for (const auto *RS : P.second) {
- const MCSymbol *Begin = RS->getStart();
- const MCSymbol *End = RS->getEnd();
- assert(Begin && "Range without a begin symbol?");
- assert(End && "Range without an end symbol?");
- if (Base) {
- Asm->EmitLabelDifference(Begin, Base, Size);
- Asm->EmitLabelDifference(End, Base, Size);
- } else {
- Asm->OutStreamer->EmitSymbolValue(Begin, Size);
- Asm->OutStreamer->EmitSymbolValue(End, Size);
- }
- }
- }
-
- // And terminate the list with two 0 values.
- Asm->OutStreamer->EmitIntValue(0, Size);
- Asm->OutStreamer->EmitIntValue(0, Size);
- }
+ for (const RangeSpanList &List : TheCU->getRangeLists())
+ emitRangeList(Asm, TheCU, List);
}
+
+ if (TableEnd)
+ Asm->OutStreamer->EmitLabel(TableEnd);
}
void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
@@ -1963,20 +2185,17 @@ void DwarfDebug::emitMacro(DIMacro &M) {
Asm->OutStreamer->EmitBytes(Name);
if (!Value.empty()) {
// There should be one space between macro name and macro value.
- Asm->EmitInt8(' ');
+ Asm->emitInt8(' ');
Asm->OutStreamer->EmitBytes(Value);
}
- Asm->EmitInt8('\0');
+ Asm->emitInt8('\0');
}
void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {
assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file);
Asm->EmitULEB128(dwarf::DW_MACINFO_start_file);
Asm->EmitULEB128(F.getLine());
- DIFile *File = F.getFile();
- unsigned FID =
- U.getOrCreateSourceID(File->getFilename(), File->getDirectory());
- Asm->EmitULEB128(FID);
+ Asm->EmitULEB128(U.getOrCreateSourceID(F.getFile()));
handleMacroNodes(F.getElements(), U);
Asm->EmitULEB128(dwarf::DW_MACINFO_end_file);
}
@@ -1995,11 +2214,14 @@ void DwarfDebug::emitDebugMacinfo() {
auto *SkCU = TheCU.getSkeleton();
DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
auto *CUNode = cast<DICompileUnit>(P.first);
- Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin());
- handleMacroNodes(CUNode->getMacros(), U);
+ DIMacroNodeArray Macros = CUNode->getMacros();
+ if (!Macros.empty()) {
+ Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin());
+ handleMacroNodes(Macros, U);
+ }
}
Asm->OutStreamer->AddComment("End Of Macro List Mark");
- Asm->EmitInt8(0);
+ Asm->emitInt8(0);
}
// DWARF5 Experimental Separate Dwarf emitters.
@@ -2017,9 +2239,6 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
SkeletonHolder.addUnit(std::move(NewU));
}
-// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list,
-// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id,
-// DW_AT_addr_base, DW_AT_ranges_base.
DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>(
@@ -2029,6 +2248,9 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
NewCU.initStmtList();
+ if (useSegmentedStringOffsetsTable())
+ NewCU.addStringOffsetsStart();
+
initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit));
return NewCU;
@@ -2051,26 +2273,37 @@ void DwarfDebug::emitDebugAbbrevDWO() {
void DwarfDebug::emitDebugLineDWO() {
assert(useSplitDwarf() && "No split dwarf?");
- Asm->OutStreamer->SwitchSection(
+ SplitTypeUnitFileTable.Emit(
+ *Asm->OutStreamer, MCDwarfLineTableParams(),
Asm->getObjFileLowering().getDwarfLineDWOSection());
- SplitTypeUnitFileTable.Emit(*Asm->OutStreamer, MCDwarfLineTableParams());
+}
+
+void DwarfDebug::emitStringOffsetsTableHeaderDWO() {
+ assert(useSplitDwarf() && "No split dwarf?");
+ InfoHolder.getStringPool().emitStringOffsetsTableHeader(
+ *Asm, Asm->getObjFileLowering().getDwarfStrOffDWOSection(),
+ InfoHolder.getStringOffsetsStartSym());
}
// Emit the .debug_str.dwo section for separated dwarf. This contains the
// string section and is identical in format to traditional .debug_str
// sections.
void DwarfDebug::emitDebugStrDWO() {
+ if (useSegmentedStringOffsetsTable())
+ emitStringOffsetsTableHeaderDWO();
assert(useSplitDwarf() && "No split dwarf?");
MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection();
InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
- OffSec);
+ OffSec, /* UseRelativeOffsets = */ false);
}
MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
if (!useSplitDwarf())
return nullptr;
- if (SingleCU)
- SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode()->getDirectory());
+ const DICompileUnit *DIUnit = CU.getCUNode();
+ SplitTypeUnitFileTable.maybeSetRootFile(
+ DIUnit->getDirectory(), DIUnit->getFilename(),
+ CU.getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource());
return &SplitTypeUnitFileTable;
}
@@ -2119,10 +2352,16 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
if (useSplitDwarf())
NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());
else {
- CU.applyStmtList(UnitDie);
NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesSection(Signature));
+ // Non-split type units reuse the compile unit's line table.
+ CU.applyStmtList(UnitDie);
}
+ // Add DW_AT_str_offsets_base to the type unit DIE, but not for split type
+ // units.
+ if (useSegmentedStringOffsetsTable() && !useSplitDwarf())
+ NewTU.addStringOffsetsStart();
+
NewTU.setType(NewTU.createTypeDIE(CTy));
if (TopLevelType) {
@@ -2157,32 +2396,50 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
CU.addDIETypeSignature(RefDie, Signature);
}
-// Accelerator table mutators - add each name along with its companion
-// DIE to the proper table while ensuring that the name that we're going
-// to reference is in the string table. We do this since the names we
-// add may not only be identical to the names in the DIE.
-void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) {
- if (!useDwarfAccelTables())
+// Add the Name along with its companion DIE to the appropriate accelerator
+// table (for AccelTableKind::Dwarf it's always AccelDebugNames, for
+// AccelTableKind::Apple, we use the table we got as an argument). If
+// accelerator tables are disabled, this function does nothing.
+template <typename DataT>
+void DwarfDebug::addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name,
+ const DIE &Die) {
+ if (getAccelTableKind() == AccelTableKind::None)
return;
- AccelNames.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
+
+ DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ DwarfStringPoolEntryRef Ref =
+ Holder.getStringPool().getEntry(*Asm, Name);
+
+ switch (getAccelTableKind()) {
+ case AccelTableKind::Apple:
+ AppleAccel.addName(Ref, Die);
+ break;
+ case AccelTableKind::Dwarf:
+ AccelDebugNames.addName(Ref, Die);
+ break;
+ case AccelTableKind::Default:
+ llvm_unreachable("Default should have already been resolved.");
+ case AccelTableKind::None:
+ llvm_unreachable("None handled above");
+ }
+}
+
+void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) {
+ addAccelNameImpl(AccelNames, Name, Die);
}
void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) {
- if (!useDwarfAccelTables())
- return;
- AccelObjC.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
+ // ObjC names go only into the Apple accelerator tables.
+ if (getAccelTableKind() == AccelTableKind::Apple)
+ addAccelNameImpl(AccelObjC, Name, Die);
}
void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) {
- if (!useDwarfAccelTables())
- return;
- AccelNamespace.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
+ addAccelNameImpl(AccelNamespace, Name, Die);
}
void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) {
- if (!useDwarfAccelTables())
- return;
- AccelTypes.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
+ addAccelNameImpl(AccelTypes, Name, Die);
}
uint16_t DwarfDebug::getDwarfVersion() const {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 2ae0b418a91e..0c7be5d27dfe 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -18,7 +18,6 @@
#include "DbgValueHistoryCalculator.h"
#include "DebugHandlerBase.h"
#include "DebugLocStream.h"
-#include "DwarfAccelTable.h"
#include "DwarfFile.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -31,6 +30,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AccelTable.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
@@ -192,6 +192,14 @@ struct SymbolCU {
DwarfCompileUnit *CU;
};
+/// The kind of accelerator tables we should emit.
+enum class AccelTableKind {
+ Default, ///< Platform default.
+ None, ///< None.
+ Apple, ///< .apple_names, .apple_namespaces, .apple_types, .apple_objc.
+ Dwarf, ///< DWARF v5 .debug_names.
+};
+
/// Collects and handles dwarf debug information.
class DwarfDebug : public DebugHandlerBase {
/// All DIEValues are allocated through this allocator.
@@ -255,12 +263,37 @@ class DwarfDebug : public DebugHandlerBase {
/// Whether to emit all linkage names, or just abstract subprograms.
bool UseAllLinkageNames;
+ /// Use inlined strings.
+ bool UseInlineStrings = false;
+
+ /// Whether to emit DWARF pub sections or not.
+ bool UsePubSections = true;
+
+ /// Allow emission of .debug_ranges section.
+ bool UseRangesSection = true;
+
+ /// True if the sections itself must be used as references and don't create
+ /// temp symbols inside DWARF sections.
+ bool UseSectionsAsReferences = false;
+
+ ///Allow emission of the .debug_loc section.
+ bool UseLocSection = true;
+
+ /// Generate DWARF v4 type units.
+ bool GenerateTypeUnits;
+
/// DWARF5 Experimental Options
/// @{
- bool HasDwarfAccelTables;
+ AccelTableKind TheAccelTableKind;
bool HasAppleExtensionAttributes;
bool HasSplitDwarf;
+ /// Whether to generate the DWARF v5 string offsets table.
+ /// It consists of a series of contributions, each preceded by a header.
+ /// The pre-DWARF v5 string offsets table for split dwarf is, in contrast,
+ /// a monolithic sequence of string offsets.
+ bool UseSegmentedStringOffsetsTable;
+
/// Separated Dwarf Variables
/// In general these will all be for bits that are left in the
/// original object file, rather than things that are meant
@@ -283,10 +316,12 @@ class DwarfDebug : public DebugHandlerBase {
AddressPool AddrPool;
- DwarfAccelTable AccelNames;
- DwarfAccelTable AccelObjC;
- DwarfAccelTable AccelNamespace;
- DwarfAccelTable AccelTypes;
+ /// Accelerator tables.
+ AccelTable<DWARF5AccelTableData> AccelDebugNames;
+ AccelTable<AppleAccelTableOffsetData> AccelNames;
+ AccelTable<AppleAccelTableOffsetData> AccelObjC;
+ AccelTable<AppleAccelTableOffsetData> AccelNamespace;
+ AccelTable<AppleAccelTableTypeData> AccelTypes;
// Identify a debugger for "tuning" the debug info.
DebuggerKind DebuggerTuning = DebuggerKind::Default;
@@ -299,9 +334,9 @@ class DwarfDebug : public DebugHandlerBase {
using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
- void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable Var,
+ void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable IV,
const MDNode *Scope);
- void ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable Var,
+ void ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable IV,
const MDNode *Scope);
DbgVariable *createConcreteVariable(DwarfCompileUnit &TheCU,
@@ -310,6 +345,10 @@ class DwarfDebug : public DebugHandlerBase {
/// Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
+ template <typename DataT>
+ void addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name,
+ const DIE &Die);
+
void finishVariableDefinitions();
void finishSubprogramDefinitions();
@@ -324,9 +363,15 @@ class DwarfDebug : public DebugHandlerBase {
/// Emit the abbreviation section.
void emitAbbreviations();
+ /// Emit the string offsets table header.
+ void emitStringOffsetsTableHeader();
+
/// Emit a specified accelerator table.
- void emitAccel(DwarfAccelTable &Accel, MCSection *Section,
- StringRef TableName);
+ template <typename AccelTableT>
+ void emitAccel(AccelTableT &Accel, MCSection *Section, StringRef TableName);
+
+ /// Emit DWARF v5 accelerator table.
+ void emitAccelDebugNames();
/// Emit visible names into a hashed accelerator table section.
void emitAccelNames();
@@ -363,6 +408,9 @@ class DwarfDebug : public DebugHandlerBase {
/// Emit address ranges into a debug ranges section.
void emitDebugRanges();
+ /// Emit range lists into a DWARF v5 debug rnglists section.
+ void emitDebugRnglists();
+
/// Emit macros into a debug macinfo section.
void emitDebugMacinfo();
void emitMacro(DIMacro &M);
@@ -375,8 +423,13 @@ class DwarfDebug : public DebugHandlerBase {
void initSkeletonUnit(const DwarfUnit &U, DIE &Die,
std::unique_ptr<DwarfCompileUnit> NewU);
- /// Construct the split debug info compile unit for the debug info
- /// section.
+ /// Construct the split debug info compile unit for the debug info section.
+ /// In DWARF v5, the skeleton unit DIE may have the following attributes:
+ /// DW_AT_addr_base, DW_AT_comp_dir, DW_AT_dwo_name, DW_AT_high_pc,
+ /// DW_AT_low_pc, DW_AT_ranges, DW_AT_stmt_list, and DW_AT_str_offsets_base.
+ /// Prior to DWARF v5 it may also have DW_AT_GNU_dwo_id. DW_AT_GNU_dwo_name
+ /// is used instead of DW_AT_dwo_name, Dw_AT_GNU_addr_base instead of
+ /// DW_AT_addr_base, and DW_AT_GNU_ranges_base instead of DW_AT_rnglists_base.
DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU);
/// Emit the debug info dwo section.
@@ -388,6 +441,9 @@ class DwarfDebug : public DebugHandlerBase {
/// Emit the debug line dwo section.
void emitDebugLineDWO();
+ /// Emit the dwo stringoffsets table header.
+ void emitStringOffsetsTableHeaderDWO();
+
/// Emit the debug str dwo section.
void emitDebugStrDWO();
@@ -422,6 +478,9 @@ class DwarfDebug : public DebugHandlerBase {
void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,
DenseSet<InlinedVariable> &P);
+ /// Emit the reference to the section.
+ void emitSectionReference(const DwarfCompileUnit &CU);
+
protected:
/// Gather pre-function debug information.
void beginFunctionImpl(const MachineFunction *MF) override;
@@ -478,11 +537,30 @@ public:
/// DWARF4 format.
bool useDWARF2Bitfields() const { return UseDWARF2Bitfields; }
+ /// Returns whether to use inline strings.
+ bool useInlineStrings() const { return UseInlineStrings; }
+
+ /// Returns whether GNU pub sections should be emitted.
+ bool usePubSections() const { return UsePubSections; }
+
+ /// Returns whether ranges section should be emitted.
+ bool useRangesSection() const { return UseRangesSection; }
+
+ /// Returns whether to use sections as labels rather than temp symbols.
+ bool useSectionsAsReferences() const {
+ return UseSectionsAsReferences;
+ }
+
+ /// Returns whether .debug_loc section should be emitted.
+ bool useLocSection() const { return UseLocSection; }
+
+ /// Returns whether to generate DWARF v4 type units.
+ bool generateTypeUnits() const { return GenerateTypeUnits; }
+
// Experimental DWARF5 features.
- /// Returns whether or not to emit tables that dwarf consumers can
- /// use to accelerate lookup.
- bool useDwarfAccelTables() const { return HasDwarfAccelTables; }
+ /// Returns what kind (if any) of accelerator tables to emit.
+ AccelTableKind getAccelTableKind() const { return TheAccelTableKind; }
bool useAppleExtensionAttributes() const {
return HasAppleExtensionAttributes;
@@ -492,6 +570,16 @@ public:
/// split dwarf proposal support.
bool useSplitDwarf() const { return HasSplitDwarf; }
+ /// Returns whether to generate a string offsets table with (possibly shared)
+ /// contributions from each CU and type unit. This implies the use of
+ /// DW_FORM_strx* indirect references with DWARF v5 and beyond. Note that
+ /// DW_FORM_GNU_str_index is also an indirect reference, but it is used with
+ /// a pre-DWARF v5 implementation of split DWARF sections, which uses a
+ /// monolithic string offsets table.
+ bool useSegmentedStringOffsetsTable() const {
+ return UseSegmentedStringOffsetsTable;
+ }
+
bool shareAcrossDWOCUs() const;
/// Returns the Dwarf Version.
@@ -537,6 +625,9 @@ public:
/// Find the matching DwarfCompileUnit for the given CU DIE.
DwarfCompileUnit *lookupCU(const DIE *Die) { return CUDieMap.lookup(Die); }
+ const DwarfCompileUnit *lookupCU(const DIE *Die) const {
+ return CUDieMap.lookup(Die);
+ }
/// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
///
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index 80d5bd208ed8..b57ea8fc6322 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -70,7 +70,7 @@ public:
};
class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase {
- void emitTypeInfos(unsigned TTypeEncoding) override;
+ void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) override;
ARMTargetStreamer &getTargetStreamer();
public:
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 68d25fe37b43..d8d1a5e8f841 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -123,7 +123,10 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(MachineReg);
unsigned RegSize = TRI.getRegSizeInBits(*RC);
// Keep track of the bits in the register we already emitted, so we
- // can avoid emitting redundant aliasing subregs.
+ // can avoid emitting redundant aliasing subregs. Because this is
+ // just doing a greedy scan of all subregisters, it is possible that
+ // this doesn't find a combination of subregisters that fully cover
+ // the register (even though one may exist).
SmallBitVector Coverage(RegSize, false);
for (MCSubRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
unsigned Idx = TRI.getSubRegIndex(MachineReg, *SR);
@@ -143,7 +146,7 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
if (CurSubReg.test(Coverage)) {
// Emit a piece for any gap in the coverage.
if (Offset > CurPos)
- DwarfRegs.push_back({-1, Offset - CurPos, nullptr});
+ DwarfRegs.push_back({-1, Offset - CurPos, "no DWARF register encoding"});
DwarfRegs.push_back(
{Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"});
if (Offset >= MaxSize)
@@ -154,8 +157,13 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
CurPos = Offset + Size;
}
}
-
- return CurPos;
+ // Failed to find any DWARF encoding.
+ if (CurPos == 0)
+ return false;
+ // Found a partial or complete DWARF encoding.
+ if (CurPos < RegSize)
+ DwarfRegs.push_back({-1, RegSize - CurPos, "no DWARF register encoding"});
+ return true;
}
void DwarfExpression::addStackValue() {
@@ -341,11 +349,22 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
case dwarf::DW_OP_plus:
case dwarf::DW_OP_minus:
case dwarf::DW_OP_mul:
+ case dwarf::DW_OP_div:
+ case dwarf::DW_OP_mod:
+ case dwarf::DW_OP_or:
+ case dwarf::DW_OP_and:
+ case dwarf::DW_OP_xor:
+ case dwarf::DW_OP_shl:
+ case dwarf::DW_OP_shr:
+ case dwarf::DW_OP_shra:
+ case dwarf::DW_OP_lit0:
+ case dwarf::DW_OP_not:
+ case dwarf::DW_OP_dup:
emitOp(Op->getOp());
break;
case dwarf::DW_OP_deref:
assert(LocationKind != Register);
- if (LocationKind != Memory && isMemoryLocation(ExprCursor))
+ if (LocationKind != Memory && ::isMemoryLocation(ExprCursor))
// Turning this into a memory location description makes the deref
// implicit.
LocationKind = Memory;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index ea5cbc40ba35..952b0d99a95a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -211,6 +211,9 @@ public:
/// Emit an unsigned constant.
void addUnsignedConstant(const APInt &Value);
+ bool isMemoryLocation() const { return LocationKind == Memory; }
+ bool isUnknownLocation() const { return LocationKind == Unknown; }
+
/// Lock this down to become a memory location description.
void setMemoryLocationKind() {
assert(LocationKind == Unknown);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 3c04c969192d..c90bd568162d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -77,42 +77,24 @@ unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
void DwarfFile::emitAbbrevs(MCSection *Section) { Abbrevs.Emit(Asm, Section); }
// Emit strings into a string section.
-void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection) {
- StrPool.emit(*Asm, StrSection, OffsetSection);
+void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection,
+ bool UseRelativeOffsets) {
+ StrPool.emit(*Asm, StrSection, OffsetSection, UseRelativeOffsets);
}
bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
- SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS];
+ auto &ScopeVars = ScopeVariables[LS];
const DILocalVariable *DV = Var->getVariable();
- // Variables with positive arg numbers are parameters.
if (unsigned ArgNum = DV->getArg()) {
- // Keep all parameters in order at the start of the variable list to ensure
- // function types are correct (no out-of-order parameters)
- //
- // This could be improved by only doing it for optimized builds (unoptimized
- // builds have the right order to begin with), searching from the back (this
- // would catch the unoptimized case quickly), or doing a binary search
- // rather than linear search.
- auto I = Vars.begin();
- while (I != Vars.end()) {
- unsigned CurNum = (*I)->getVariable()->getArg();
- // A local (non-parameter) variable has been found, insert immediately
- // before it.
- if (CurNum == 0)
- break;
- // A later indexed parameter has been found, insert immediately before it.
- if (CurNum > ArgNum)
- break;
- if (CurNum == ArgNum) {
- (*I)->addMMIEntry(*Var);
- return false;
- }
- ++I;
+ auto Cached = ScopeVars.Args.find(ArgNum);
+ if (Cached == ScopeVars.Args.end())
+ ScopeVars.Args[ArgNum] = Var;
+ else {
+ Cached->second->addMMIEntry(*Var);
+ return false;
}
- Vars.insert(I, Var);
- return true;
- }
-
- Vars.push_back(Var);
+ } else {
+ ScopeVars.Locals.push_back(Var);
+ }
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index 167ca13c19c1..8dfbc4e1c434 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/DIE.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Allocator.h"
+#include <map>
#include <memory>
#include <utility>
@@ -43,8 +44,23 @@ class DwarfFile {
DwarfStringPool StrPool;
- // Collection of dbg variables of a scope.
- DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> ScopeVariables;
+ /// DWARF v5: The symbol that designates the start of the contribution to
+ /// the string offsets table. The contribution is shared by all units.
+ MCSymbol *StringOffsetsStartSym = nullptr;
+
+ /// DWARF v5: The symbol that designates the base of the range list table.
+ /// The table is shared by all units.
+ MCSymbol *RnglistsTableBaseSym = nullptr;
+
+ /// The variables of a lexical scope.
+ struct ScopeVars {
+ /// We need to sort Args by ArgNo and check for duplicates. This could also
+ /// be implemented as a list or vector + std::lower_bound().
+ std::map<unsigned, DbgVariable *> Args;
+ SmallVector<DbgVariable *, 8> Locals;
+ };
+ /// Collection of DbgVariables of each lexical scope.
+ DenseMap<LexicalScope *, ScopeVars> ScopeVariables;
// Collection of abstract subprogram DIEs.
DenseMap<const MDNode *, DIE *> AbstractSPDies;
@@ -62,39 +78,51 @@ public:
return CUs;
}
- /// \brief Compute the size and offset of a DIE given an incoming Offset.
+ /// Compute the size and offset of a DIE given an incoming Offset.
unsigned computeSizeAndOffset(DIE &Die, unsigned Offset);
- /// \brief Compute the size and offset of all the DIEs.
+ /// Compute the size and offset of all the DIEs.
void computeSizeAndOffsets();
- /// \brief Compute the size and offset of all the DIEs in the given unit.
+ /// Compute the size and offset of all the DIEs in the given unit.
/// \returns The size of the root DIE.
unsigned computeSizeAndOffsetsForUnit(DwarfUnit *TheU);
- /// \brief Add a unit to the list of CUs.
+ /// Add a unit to the list of CUs.
void addUnit(std::unique_ptr<DwarfCompileUnit> U);
- /// \brief Emit all of the units to the section listed with the given
+ /// Emit all of the units to the section listed with the given
/// abbreviation section.
void emitUnits(bool UseOffsets);
- /// \brief Emit the given unit to its section.
+ /// Emit the given unit to its section.
void emitUnit(DwarfUnit *U, bool UseOffsets);
- /// \brief Emit a set of abbreviations to the specific section.
+ /// Emit a set of abbreviations to the specific section.
void emitAbbrevs(MCSection *);
- /// \brief Emit all of the strings to the section given.
- void emitStrings(MCSection *StrSection, MCSection *OffsetSection = nullptr);
+ /// Emit all of the strings to the section given. If OffsetSection is
+ /// non-null, emit a table of string offsets to it. If UseRelativeOffsets
+ /// is false, emit absolute offsets to the strings. Otherwise, emit
+ /// relocatable references to the strings if they are supported by the target.
+ void emitStrings(MCSection *StrSection, MCSection *OffsetSection = nullptr,
+ bool UseRelativeOffsets = false);
- /// \brief Returns the string pool.
+ /// Returns the string pool.
DwarfStringPool &getStringPool() { return StrPool; }
+ MCSymbol *getStringOffsetsStartSym() const { return StringOffsetsStartSym; }
+
+ void setStringOffsetsStartSym(MCSymbol *Sym) { StringOffsetsStartSym = Sym; }
+
+ MCSymbol *getRnglistsTableBaseSym() const { return RnglistsTableBaseSym; }
+
+ void setRnglistsTableBaseSym(MCSymbol *Sym) { RnglistsTableBaseSym = Sym; }
+
/// \returns false if the variable was merged with a previous one.
bool addScopeVariable(LexicalScope *LS, DbgVariable *Var);
- DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> &getScopeVariables() {
+ DenseMap<LexicalScope *, ScopeVars> &getScopeVariables() {
return ScopeVariables;
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index aa5f01e88933..a61fa83cfb03 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -39,8 +39,30 @@ DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,
return EntryRef(*I.first);
}
+void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
+ MCSection *Section,
+ MCSymbol *StartSym) {
+ if (empty())
+ return;
+ Asm.OutStreamer->SwitchSection(Section);
+ unsigned EntrySize = 4;
+ // FIXME: DWARF64
+ // We are emitting the header for a contribution to the string offsets
+ // table. The header consists of an entry with the contribution's
+ // size (not including the size of the length field), the DWARF version and
+ // 2 bytes of padding.
+ Asm.emitInt32(size() * EntrySize + 4);
+ Asm.emitInt16(Asm.getDwarfVersion());
+ Asm.emitInt16(0);
+ // Define the symbol that marks the start of the contribution. It is
+ // referenced by most unit headers via DW_AT_str_offsets_base.
+ // Split units do not use the attribute.
+ if (StartSym)
+ Asm.OutStreamer->EmitLabel(StartSym);
+}
+
void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
- MCSection *OffsetSection) {
+ MCSection *OffsetSection, bool UseRelativeOffsets) {
if (Pool.empty())
return;
@@ -74,6 +96,9 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
Asm.OutStreamer->SwitchSection(OffsetSection);
unsigned size = 4; // FIXME: DWARF64 is 8.
for (const auto &Entry : Entries)
- Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size);
+ if (UseRelativeOffsets)
+ Asm.emitDwarfStringOffset(Entry->getValue());
+ else
+ Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size);
}
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index 1cac3b7c8432..6e6988ea4ad4 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -19,6 +19,7 @@ namespace llvm {
class AsmPrinter;
class MCSection;
+class MCSymbol;
// Collection of strings for this unit and assorted symbols.
// A String->Symbol mapping of strings used by indirect
@@ -36,11 +37,17 @@ public:
DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix);
+ void emitStringOffsetsTableHeader(AsmPrinter &Asm, MCSection *OffsetSection,
+ MCSymbol *StartSym);
+
void emit(AsmPrinter &Asm, MCSection *StrSection,
- MCSection *OffsetSection = nullptr);
+ MCSection *OffsetSection = nullptr,
+ bool UseRelativeOffsets = false);
bool empty() const { return Pool.empty(); }
+ unsigned size() const { return Pool.size(); }
+
/// Get a reference to an entry in the string pool.
EntryRef getEntry(AsmPrinter &Asm, StringRef Str);
};
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 4ea59f504bd4..43b835b2c4aa 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -19,10 +19,10 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/None.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
@@ -30,12 +30,14 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Metadata.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include <cassert>
#include <cstdint>
#include <string>
@@ -45,11 +47,6 @@ using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
-static cl::opt<bool>
-GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
- cl::desc("Generate DWARF4 type units."),
- cl::init(false));
-
DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
DIELoc &DIE)
: DwarfExpression(AP.getDwarfVersion()), AP(AP), DU(DU),
@@ -83,8 +80,6 @@ DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
MCDwarfDwoLineTable *SplitLineTable)
: DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), CU(CU),
SplitLineTable(SplitLineTable) {
- if (SplitLineTable)
- addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0);
}
DwarfUnit::~DwarfUnit() {
@@ -185,7 +180,7 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
return false;
return (isa<DIType>(D) ||
(isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
- !GenerateDwarfTypeUnits;
+ !DD->generateTypeUnits();
}
DIE *DwarfUnit::getDIE(const DINode *D) const {
@@ -239,9 +234,28 @@ void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
StringRef String) {
- Die.addValue(DIEValueAllocator, Attribute,
- isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp,
- DIEString(DU->getStringPool().getEntry(*Asm, String)));
+ if (DD->useInlineStrings()) {
+ Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_string,
+ new (DIEValueAllocator)
+ DIEInlineString(String, DIEValueAllocator));
+ return;
+ }
+ auto StringPoolEntry = DU->getStringPool().getEntry(*Asm, String);
+ dwarf::Form IxForm =
+ isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp;
+ // For DWARF v5 and beyond, use the smallest strx? form possible.
+ if (useSegmentedStringOffsetsTable()) {
+ IxForm = dwarf::DW_FORM_strx1;
+ unsigned Index = StringPoolEntry.getIndex();
+ if (Index > 0xffffff)
+ IxForm = dwarf::DW_FORM_strx4;
+ else if (Index > 0xffff)
+ IxForm = dwarf::DW_FORM_strx3;
+ else if (Index > 0xff)
+ IxForm = dwarf::DW_FORM_strx2;
+ }
+ Die.addValue(DIEValueAllocator, Attribute, IxForm,
+ DIEString(StringPoolEntry));
}
DIEValueList::value_iterator DwarfUnit::addLabel(DIEValueList &Die,
@@ -263,9 +277,33 @@ void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer);
}
-unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) {
- return SplitLineTable ? SplitLineTable->getFile(DirName, FileName)
- : getCU().getOrCreateSourceID(FileName, DirName);
+MD5::MD5Result *DwarfUnit::getMD5AsBytes(const DIFile *File) const {
+ assert(File);
+ if (DD->getDwarfVersion() < 5)
+ return nullptr;
+ Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum();
+ if (!Checksum || Checksum->Kind != DIFile::CSK_MD5)
+ return nullptr;
+
+ // Convert the string checksum to an MD5Result for the streamer.
+ // The verifier validates the checksum so we assume it's okay.
+ // An MD5 checksum is 16 bytes.
+ std::string ChecksumString = fromHex(Checksum->Value);
+ void *CKMem = Asm->OutStreamer->getContext().allocate(16, 1);
+ memcpy(CKMem, ChecksumString.data(), 16);
+ return reinterpret_cast<MD5::MD5Result *>(CKMem);
+}
+
+unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
+ if (!SplitLineTable)
+ return getCU().getOrCreateSourceID(File);
+ if (!UsedLineTable) {
+ UsedLineTable = true;
+ // This is a split type unit that needs a line table.
+ addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0);
+ }
+ return SplitLineTable->getFile(File->getDirectory(), File->getFilename(),
+ getMD5AsBytes(File), File->getSource());
}
void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
@@ -335,12 +373,11 @@ void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute,
Die.addValue(DIEValueAllocator, Attribute, Block->BestForm(), Block);
}
-void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File,
- StringRef Directory) {
+void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) {
if (Line == 0)
return;
- unsigned FileID = getOrCreateSourceID(File, Directory);
+ unsigned FileID = getOrCreateSourceID(File);
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
@@ -349,32 +386,31 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File,
void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) {
assert(V);
- addSourceLine(Die, V->getLine(), V->getScope()->getFilename(),
- V->getScope()->getDirectory());
+ addSourceLine(Die, V->getLine(), V->getFile());
}
void DwarfUnit::addSourceLine(DIE &Die, const DIGlobalVariable *G) {
assert(G);
- addSourceLine(Die, G->getLine(), G->getFilename(), G->getDirectory());
+ addSourceLine(Die, G->getLine(), G->getFile());
}
void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) {
assert(SP);
- addSourceLine(Die, SP->getLine(), SP->getFilename(), SP->getDirectory());
+ addSourceLine(Die, SP->getLine(), SP->getFile());
}
void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) {
assert(Ty);
- addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
+ addSourceLine(Die, Ty->getLine(), Ty->getFile());
}
void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {
assert(Ty);
- addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
+ addSourceLine(Die, Ty->getLine(), Ty->getFile());
}
/* Byref variables, in Blocks, are declared by the programmer as "SomeType
@@ -727,7 +763,7 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
constructTypeDIE(TyDIE, STy);
else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
- if (GenerateDwarfTypeUnits && !Ty->isForwardDecl())
+ if (DD->generateTypeUnits() && !Ty->isForwardDecl())
if (MDString *TypeId = CTy->getRawIdentifier()) {
DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
// Skip updating the accelerator tables since this is not the full type.
@@ -917,9 +953,24 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
case dwarf::DW_TAG_enumeration_type:
constructEnumTypeDIE(Buffer, CTy);
break;
+ case dwarf::DW_TAG_variant_part:
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_union_type:
case dwarf::DW_TAG_class_type: {
+ // Emit the discriminator for a variant part.
+ DIDerivedType *Discriminator = nullptr;
+ if (Tag == dwarf::DW_TAG_variant_part) {
+ Discriminator = CTy->getDiscriminator();
+ if (Discriminator) {
+ // DWARF says:
+ // If the variant part has a discriminant, the discriminant is
+ // represented by a separate debugging information entry which is
+ // a child of the variant part entry.
+ DIE &DiscMember = constructMemberDIE(Buffer, Discriminator);
+ addDIEEntry(Buffer, dwarf::DW_AT_discr, DiscMember);
+ }
+ }
+
// Add elements to structure type.
DINodeArray Elements = CTy->getElements();
for (const auto *Element : Elements) {
@@ -933,6 +984,18 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
addType(ElemDie, resolve(DDTy->getBaseType()), dwarf::DW_AT_friend);
} else if (DDTy->isStaticMember()) {
getOrCreateStaticMemberDIE(DDTy);
+ } else if (Tag == dwarf::DW_TAG_variant_part) {
+ // When emitting a variant part, wrap each member in
+ // DW_TAG_variant.
+ DIE &Variant = createAndAddDIE(dwarf::DW_TAG_variant, Buffer);
+ if (const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) {
+ if (isUnsignedDIType(DD, resolve(Discriminator->getBaseType())))
+ addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue());
+ else
+ addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue());
+ }
+ constructMemberDIE(Variant, DDTy);
} else {
constructMemberDIE(Buffer, DDTy);
}
@@ -952,6 +1015,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (unsigned PropertyAttributes = Property->getAttributes())
addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None,
PropertyAttributes);
+ } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) {
+ if (Composite->getTag() == dwarf::DW_TAG_variant_part) {
+ DIE &VariantPart = createAndAddDIE(Composite->getTag(), Buffer);
+ constructTypeDIE(VariantPart, Composite);
+ }
}
}
@@ -975,6 +1043,15 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
addTemplateParams(Buffer, CTy->getTemplateParams());
+ // Add the type's non-standard calling convention.
+ uint8_t CC = 0;
+ if (CTy->isTypePassByValue())
+ CC = dwarf::DW_CC_pass_by_value;
+ else if (CTy->isTypePassByReference())
+ CC = dwarf::DW_CC_pass_by_reference;
+ if (CC)
+ addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1,
+ CC);
break;
}
default:
@@ -1152,9 +1229,8 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
// Look at the Decl's linkage name only if we emitted it.
if (DD->useAllLinkageNames())
DeclLinkageName = SPDecl->getLinkageName();
- unsigned DeclID =
- getOrCreateSourceID(SPDecl->getFilename(), SPDecl->getDirectory());
- unsigned DefID = getOrCreateSourceID(SP->getFilename(), SP->getDirectory());
+ unsigned DeclID = getOrCreateSourceID(SPDecl->getFile());
+ unsigned DefID = getOrCreateSourceID(SP->getFile());
if (DeclID != DefID)
addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID);
@@ -1304,14 +1380,17 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
// DW_AT_lower_bound and DW_AT_count attributes.
int64_t LowerBound = SR->getLowerBound();
int64_t DefaultLowerBound = getDefaultLowerBound();
- int64_t Count = SR->getCount();
+ int64_t Count = -1;
+ if (auto *CI = SR->getCount().dyn_cast<ConstantInt*>())
+ Count = CI->getSExtValue();
if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)
addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound);
- if (Count != -1)
- // FIXME: An unbounded array should reference the expression that defines
- // the array.
+ if (auto *CV = SR->getCount().dyn_cast<DIVariable*>()) {
+ if (auto *CountVarDIE = getDIE(CV))
+ addDIEEntry(DW_Subrange, dwarf::DW_AT_count, *CountVarDIE);
+ } else if (Count != -1)
addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count);
}
@@ -1320,16 +1399,49 @@ DIE *DwarfUnit::getIndexTyDie() {
return IndexTyDie;
// Construct an integer type to use for indexes.
IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, getUnitDie());
- addString(*IndexTyDie, dwarf::DW_AT_name, "sizetype");
+ StringRef Name = "__ARRAY_SIZE_TYPE__";
+ addString(*IndexTyDie, dwarf::DW_AT_name, Name);
addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
dwarf::DW_ATE_unsigned);
+ DD->addAccelType(Name, *IndexTyDie, /*Flags*/ 0);
return IndexTyDie;
}
+/// Returns true if the vector's size differs from the sum of sizes of elements
+/// the user specified. This can occur if the vector has been rounded up to
+/// fit memory alignment constraints.
+static bool hasVectorBeenPadded(const DICompositeType *CTy) {
+ assert(CTy && CTy->isVector() && "Composite type is not a vector");
+ const uint64_t ActualSize = CTy->getSizeInBits();
+
+ // Obtain the size of each element in the vector.
+ DIType *BaseTy = CTy->getBaseType().resolve();
+ assert(BaseTy && "Unknown vector element type.");
+ const uint64_t ElementSize = BaseTy->getSizeInBits();
+
+ // Locate the number of elements in the vector.
+ const DINodeArray Elements = CTy->getElements();
+ assert(Elements.size() == 1 &&
+ Elements[0]->getTag() == dwarf::DW_TAG_subrange_type &&
+ "Invalid vector element array, expected one element of type subrange");
+ const auto Subrange = cast<DISubrange>(Elements[0]);
+ const auto CI = Subrange->getCount().get<ConstantInt *>();
+ const int32_t NumVecElements = CI->getSExtValue();
+
+ // Ensure we found the element count and that the actual size is wide
+ // enough to contain the requested size.
+ assert(ActualSize >= (NumVecElements * ElementSize) && "Invalid vector size");
+ return ActualSize != (NumVecElements * ElementSize);
+}
+
void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
- if (CTy->isVector())
+ if (CTy->isVector()) {
addFlag(Buffer, dwarf::DW_AT_GNU_vector);
+ if (hasVectorBeenPadded(CTy))
+ addUInt(Buffer, dwarf::DW_AT_byte_size, None,
+ CTy->getSizeInBits() / CHAR_BIT);
+ }
// Emit the element type.
addType(Buffer, resolve(CTy->getBaseType()));
@@ -1350,6 +1462,15 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
}
void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
+ const DIType *DTy = resolve(CTy->getBaseType());
+ bool IsUnsigned = DTy && isUnsignedDIType(DD, DTy);
+ if (DTy) {
+ if (DD->getDwarfVersion() >= 3)
+ addType(Buffer, DTy);
+ if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagFixedEnum))
+ addFlag(Buffer, dwarf::DW_AT_enum_class);
+ }
+
DINodeArray Elements = CTy->getElements();
// Add enumerators to enumeration type.
@@ -1359,16 +1480,10 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer);
StringRef Name = Enum->getName();
addString(Enumerator, dwarf::DW_AT_name, Name);
- int64_t Value = Enum->getValue();
- addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
- Value);
+ auto Value = static_cast<uint64_t>(Enum->getValue());
+ addConstantValue(Enumerator, IsUnsigned, Value);
}
}
- const DIType *DTy = resolve(CTy->getBaseType());
- if (DTy) {
- addType(Buffer, DTy);
- addFlag(Buffer, dwarf::DW_AT_enum_class);
- }
}
void DwarfUnit::constructContainingTypeDIEs() {
@@ -1385,7 +1500,7 @@ void DwarfUnit::constructContainingTypeDIEs() {
}
}
-void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
+DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
DIE &MemberDie = createAndAddDIE(DT->getTag(), Buffer);
StringRef Name = DT->getName();
if (!Name.empty())
@@ -1490,6 +1605,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
if (DT->isArtificial())
addFlag(MemberDie, dwarf::DW_AT_artificial);
+
+ return MemberDie;
}
DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
@@ -1542,18 +1659,18 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
// Emit size of content not including length itself
Asm->OutStreamer->AddComment("Length of Unit");
- Asm->EmitInt32(getHeaderSize() + getUnitDie().getSize());
+ Asm->emitInt32(getHeaderSize() + getUnitDie().getSize());
Asm->OutStreamer->AddComment("DWARF version number");
unsigned Version = DD->getDwarfVersion();
- Asm->EmitInt16(Version);
+ Asm->emitInt16(Version);
// DWARF v5 reorders the address size and adds a unit type.
if (Version >= 5) {
Asm->OutStreamer->AddComment("DWARF Unit Type");
- Asm->EmitInt8(UT);
+ Asm->emitInt8(UT);
Asm->OutStreamer->AddComment("Address Size (in bytes)");
- Asm->EmitInt8(Asm->MAI->getCodePointerSize());
+ Asm->emitInt8(Asm->MAI->getCodePointerSize());
}
// We share one abbreviations table across all units so it's always at the
@@ -1562,14 +1679,14 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
if (UseOffsets)
- Asm->EmitInt32(0);
+ Asm->emitInt32(0);
else
Asm->emitDwarfSymbolReference(
TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false);
if (Version <= 4) {
Asm->OutStreamer->AddComment("Address Size (in bytes)");
- Asm->EmitInt8(Asm->MAI->getCodePointerSize());
+ Asm->emitInt8(Asm->MAI->getCodePointerSize());
}
}
@@ -1628,3 +1745,19 @@ const MCSymbol *DwarfUnit::getCrossSectionRelativeBaseAddress() const {
return nullptr;
return getSection()->getBeginSymbol();
}
+
+void DwarfUnit::addStringOffsetsStart() {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ addSectionLabel(getUnitDie(), dwarf::DW_AT_str_offsets_base,
+ DU->getStringOffsetsStartSym(),
+ TLOF.getDwarfStrOffSection()->getBeginSymbol());
+}
+
+void DwarfUnit::addRnglistsBase() {
+ assert(DD->getDwarfVersion() >= 5 &&
+ "DW_AT_rnglists_base requires DWARF version 5 or later");
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ addSectionLabel(getUnitDie(), dwarf::DW_AT_rnglists_base,
+ DU->getRnglistsTableBaseSym(),
+ TLOF.getDwarfRnglistsSection()->getBeginSymbol());
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 4cc01b3298d4..69696f626536 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -98,7 +98,7 @@ protected:
/// corresponds to the MDNode mapped with the subprogram DIE.
DenseMap<DIE *, const DINode *> ContainingTypeMap;
- DwarfUnit(dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, DwarfDebug *DW,
+ DwarfUnit(dwarf::Tag, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW,
DwarfFile *DWU);
bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie);
@@ -112,6 +112,8 @@ public:
uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
const DICompileUnit *getCUNode() const { return CUNode; }
+ uint16_t getDwarfVersion() const { return DD->getDwarfVersion(); }
+
/// Return true if this compile unit has something to write out.
bool hasContent() const { return getUnitDie().hasChildren(); }
@@ -185,7 +187,7 @@ public:
/// Add a dwarf op address data and value using the form given and an
/// op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
- void addOpAddress(DIELoc &Die, const MCSymbol *Label);
+ void addOpAddress(DIELoc &Die, const MCSymbol *Sym);
/// Add a label delta attribute data and value.
void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
@@ -201,14 +203,13 @@ public:
void addDIETypeSignature(DIE &Die, uint64_t Signature);
/// Add block data.
- void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block);
+ void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc);
/// Add block data.
void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block);
/// Add location information to specified debug information entry.
- void addSourceLine(DIE &Die, unsigned Line, StringRef File,
- StringRef Directory);
+ void addSourceLine(DIE &Die, unsigned Line, const DIFile *File);
void addSourceLine(DIE &Die, const DILocalVariable *V);
void addSourceLine(DIE &Die, const DIGlobalVariable *G);
void addSourceLine(DIE &Die, const DISubprogram *SP);
@@ -259,7 +260,7 @@ public:
bool SkipSPAttributes = false);
/// Find existing DIE or create new DIE for the given type.
- DIE *getOrCreateTypeDIE(const MDNode *N);
+ DIE *getOrCreateTypeDIE(const MDNode *TyNode);
/// Get context owner's DIE.
DIE *getOrCreateContextDIE(const DIScope *Context);
@@ -274,6 +275,10 @@ public:
/// call insertDIE if MD is not null.
DIE &createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N = nullptr);
+ bool useSegmentedStringOffsetsTable() const {
+ return DD->useSegmentedStringOffsetsTable();
+ }
+
/// Compute the size of a header for this unit, not including the initial
/// length field.
virtual unsigned getHeaderSize() const {
@@ -287,6 +292,12 @@ public:
/// Emit the header for this unit, not including the initial length field.
virtual void emitHeader(bool UseOffsets) = 0;
+ /// Add the DW_AT_str_offsets_base attribute to the unit DIE.
+ void addStringOffsetsStart();
+
+ /// Add the DW_AT_rnglists_base attribute to the unit DIE.
+ void addRnglistsBase();
+
virtual DwarfCompileUnit &getCU() = 0;
void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
@@ -300,15 +311,19 @@ public:
const MCSymbol *Label,
const MCSymbol *Sec);
+ /// If the \p File has an MD5 checksum, return it as an MD5Result
+ /// allocated in the MCContext.
+ MD5::MD5Result *getMD5AsBytes(const DIFile *File) const;
+
protected:
~DwarfUnit();
/// Create new static data member DIE.
DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT);
- /// Look up the source ID with the given directory and source file names. If
- /// none currently exists, create a new ID and insert it in the line table.
- virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0;
+ /// Look up the source ID for the given file. If none currently exists,
+ /// create a new ID and insert it in the line table.
+ virtual unsigned getOrCreateSourceID(const DIFile *File) = 0;
/// Look in the DwarfDebug map for the MDNode that corresponds to the
/// reference.
@@ -327,11 +342,11 @@ protected:
private:
void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy);
void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy);
- void constructTypeDIE(DIE &Buffer, const DISubroutineType *DTy);
+ void constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy);
void constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, DIE *IndexTy);
void constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy);
void constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy);
- void constructMemberDIE(DIE &Buffer, const DIDerivedType *DT);
+ DIE &constructMemberDIE(DIE &Buffer, const DIDerivedType *DT);
void constructTemplateTypeParameterDIE(DIE &Buffer,
const DITemplateTypeParameter *TP);
void constructTemplateValueParameterDIE(DIE &Buffer,
@@ -357,8 +372,9 @@ class DwarfTypeUnit final : public DwarfUnit {
const DIE *Ty;
DwarfCompileUnit &CU;
MCDwarfDwoLineTable *SplitLineTable;
+ bool UsedLineTable = false;
- unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override;
+ unsigned getOrCreateSourceID(const DIFile *File) override;
bool isDwoUnit() const override;
public:
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 3cdab57bca70..65de9d7e65a4 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -30,6 +29,7 @@
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -58,10 +58,10 @@ unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L,
/// Compute the actions table and gather the first action index for each landing
/// pad site.
-unsigned EHStreamer::
-computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
- SmallVectorImpl<ActionEntry> &Actions,
- SmallVectorImpl<unsigned> &FirstActions) {
+void EHStreamer::computeActionsTable(
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions) {
// The action table follows the call-site table in the LSDA. The individual
// records are of two types:
//
@@ -149,7 +149,7 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
} // else identical - re-use previous FirstAction
- // Information used when created the call-site table. The action record
+ // Information used when creating the call-site table. The action record
// field of the call site record is the offset of the first associated
// action record, relative to the start of the actions table. This value is
// biased by 1 (1 indicating the start of the actions table), and 0
@@ -161,8 +161,6 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
PrevLPI = LPI;
}
-
- return SizeActions;
}
/// Return `true' if this is a call to a function marked `nounwind'. Return
@@ -361,55 +359,33 @@ void EHStreamer::emitExceptionTable() {
LandingPads.push_back(&PadInfos[i]);
// Order landing pads lexicographically by type id.
- std::sort(LandingPads.begin(), LandingPads.end(),
- [](const LandingPadInfo *L,
- const LandingPadInfo *R) { return L->TypeIds < R->TypeIds; });
+ llvm::sort(LandingPads.begin(), LandingPads.end(),
+ [](const LandingPadInfo *L,
+ const LandingPadInfo *R) { return L->TypeIds < R->TypeIds; });
// Compute the actions table and gather the first action index for each
// landing pad site.
SmallVector<ActionEntry, 32> Actions;
SmallVector<unsigned, 64> FirstActions;
- unsigned SizeActions =
- computeActionsTable(LandingPads, Actions, FirstActions);
+ computeActionsTable(LandingPads, Actions, FirstActions);
// Compute the call-site table.
SmallVector<CallSiteEntry, 64> CallSites;
computeCallSiteTable(CallSites, LandingPads, FirstActions);
- // Final tallies.
-
- // Call sites.
bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
- bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
-
- unsigned CallSiteTableLength;
- if (IsSJLJ)
- CallSiteTableLength = 0;
- else {
- unsigned SiteStartSize = 4; // dwarf::DW_EH_PE_udata4
- unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4
- unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4
- CallSiteTableLength =
- CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize);
- }
-
- for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
- CallSiteTableLength += getULEB128Size(CallSites[i].Action);
- if (IsSJLJ)
- CallSiteTableLength += getULEB128Size(i);
- }
+ unsigned CallSiteEncoding =
+ IsSJLJ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_uleb128;
+ bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty();
// Type infos.
MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
unsigned TTypeEncoding;
- unsigned TypeFormatSize;
if (!HaveTTData) {
- // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
- // that we're omitting that bit.
+ // If there is no TypeInfo, then we just explicitly say that we're omitting
+ // that bit.
TTypeEncoding = dwarf::DW_EH_PE_omit;
- // dwarf::DW_EH_PE_absptr
- TypeFormatSize = Asm->getDataLayout().getPointerSize();
} else {
// Okay, we have actual filters or typeinfos to emit. As such, we need to
// pick a type encoding for them. We're about to emit a list of pointers to
@@ -439,7 +415,6 @@ void EHStreamer::emitExceptionTable() {
// in target-independent code.
//
TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding();
- TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding);
}
// Begin the exception table.
@@ -460,64 +435,35 @@ void EHStreamer::emitExceptionTable() {
Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
Asm->EmitEncodingByte(TTypeEncoding, "@TType");
- // The type infos need to be aligned. GCC does this by inserting padding just
- // before the type infos. However, this changes the size of the exception
- // table, so you need to take this into account when you output the exception
- // table size. However, the size is output using a variable length encoding.
- // So by increasing the size by inserting padding, you may increase the number
- // of bytes used for writing the size. If it increases, say by one byte, then
- // you now need to output one less byte of padding to get the type infos
- // aligned. However this decreases the size of the exception table. This
- // changes the value you have to output for the exception table size. Due to
- // the variable length encoding, the number of bytes used for writing the
- // length may decrease. If so, you then have to increase the amount of
- // padding. And so on. If you look carefully at the GCC code you will see that
- // it indeed does this in a loop, going on and on until the values stabilize.
- // We chose another solution: don't output padding inside the table like GCC
- // does, instead output it before the table.
- unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
- unsigned CallSiteTableLengthSize = getULEB128Size(CallSiteTableLength);
- unsigned TTypeBaseOffset =
- sizeof(int8_t) + // Call site format
- CallSiteTableLengthSize + // Call site table length size
- CallSiteTableLength + // Call site table length
- SizeActions + // Actions size
- SizeTypes;
- unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset);
- unsigned TotalSize =
- sizeof(int8_t) + // LPStart format
- sizeof(int8_t) + // TType format
- (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size
- TTypeBaseOffset; // TType base offset
- unsigned PadBytes = (4 - TotalSize) & 3;
-
+ MCSymbol *TTBaseLabel = nullptr;
if (HaveTTData) {
- // Account for any extra padding that will be added to the call site table
- // length.
- Asm->EmitPaddedULEB128(TTypeBaseOffset, TTypeBaseOffsetSize + PadBytes,
- "@TType base offset");
- PadBytes = 0;
+ // N.B.: There is a dependency loop between the size of the TTBase uleb128
+ // here and the amount of padding before the aligned type table. The
+ // assembler must sometimes pad this uleb128 or insert extra padding before
+ // the type table. See PR35809 or GNU as bug 4029.
+ MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref");
+ TTBaseLabel = Asm->createTempSymbol("ttbase");
+ Asm->EmitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel);
+ Asm->OutStreamer->EmitLabel(TTBaseRefLabel);
}
bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
+ // Emit the landing pad call site table.
+ MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin");
+ MCSymbol *CstEndLabel = Asm->createTempSymbol("cst_end");
+ Asm->EmitEncodingByte(CallSiteEncoding, "Call site");
+ Asm->EmitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel);
+ Asm->OutStreamer->EmitLabel(CstBeginLabel);
+
// SjLj Exception handling
if (IsSJLJ) {
- Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
-
- // Add extra padding if it wasn't added to the TType base offset.
- Asm->EmitPaddedULEB128(CallSiteTableLength,
- CallSiteTableLengthSize + PadBytes,
- "Call site table length");
-
- // Emit the landing pad site information.
unsigned idx = 0;
for (SmallVectorImpl<CallSiteEntry>::const_iterator
I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
const CallSiteEntry &S = *I;
- // Offset of the landing pad, counted in 16-byte bundles relative to the
- // @LPStart address.
+ // Index of the call site entry.
if (VerboseAsm) {
Asm->OutStreamer->AddComment(">> Call Site " + Twine(idx) + " <<");
Asm->OutStreamer->AddComment(" On exception at call site "+Twine(idx));
@@ -557,14 +503,6 @@ void EHStreamer::emitExceptionTable() {
// A missing entry in the call-site table indicates that a call is not
// supposed to throw.
- // Emit the landing pad call site table.
- Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
-
- // Add extra padding if it wasn't added to the TType base offset.
- Asm->EmitPaddedULEB128(CallSiteTableLength,
- CallSiteTableLengthSize + PadBytes,
- "Call site table length");
-
unsigned Entry = 0;
for (SmallVectorImpl<CallSiteEntry>::const_iterator
I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
@@ -579,29 +517,27 @@ void EHStreamer::emitExceptionTable() {
if (!EndLabel)
EndLabel = Asm->getFunctionEnd();
- // Offset of the call site relative to the previous call site, counted in
- // number of 16-byte bundles. The first call site is counted relative to
- // the start of the procedure fragment.
+ // Offset of the call site relative to the start of the procedure.
if (VerboseAsm)
Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<");
- Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
+ Asm->EmitLabelDifferenceAsULEB128(BeginLabel, EHFuncBeginSym);
if (VerboseAsm)
Asm->OutStreamer->AddComment(Twine(" Call between ") +
BeginLabel->getName() + " and " +
EndLabel->getName());
- Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+ Asm->EmitLabelDifferenceAsULEB128(EndLabel, BeginLabel);
- // Offset of the landing pad, counted in 16-byte bundles relative to the
- // @LPStart address.
+ // Offset of the landing pad relative to the start of the procedure.
if (!S.LPad) {
if (VerboseAsm)
Asm->OutStreamer->AddComment(" has no landing pad");
- Asm->OutStreamer->EmitIntValue(0, 4/*size*/);
+ Asm->EmitULEB128(0);
} else {
if (VerboseAsm)
Asm->OutStreamer->AddComment(Twine(" jumps to ") +
S.LPad->LandingPadLabel->getName());
- Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4);
+ Asm->EmitLabelDifferenceAsULEB128(S.LPad->LandingPadLabel,
+ EHFuncBeginSym);
}
// Offset of the first associated action record, relative to the start of
@@ -617,6 +553,7 @@ void EHStreamer::emitExceptionTable() {
Asm->EmitULEB128(S.Action);
}
}
+ Asm->OutStreamer->EmitLabel(CstEndLabel);
// Emit the Action Table.
int Entry = 0;
@@ -660,12 +597,15 @@ void EHStreamer::emitExceptionTable() {
Asm->EmitSLEB128(Action.NextAction);
}
- emitTypeInfos(TTypeEncoding);
+ if (HaveTTData) {
+ Asm->EmitAlignment(2);
+ emitTypeInfos(TTypeEncoding, TTBaseLabel);
+ }
Asm->EmitAlignment(2);
}
-void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
+void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
const MachineFunction *MF = Asm->MF;
const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
const std::vector<unsigned> &FilterIds = MF->getFilterIds();
@@ -687,6 +627,8 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
Asm->EmitTTypeReference(GV, TTypeEncoding);
}
+ Asm->OutStreamer->EmitLabel(TTBaseLabel);
+
// Emit the Exception Specifications.
if (VerboseAsm && !FilterIds.empty()) {
Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
index 7962b761d8de..b89421a1e067 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -72,9 +72,9 @@ protected:
/// Compute the actions table and gather the first action index for each
/// landing pad site.
- unsigned computeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
- SmallVectorImpl<ActionEntry> &Actions,
- SmallVectorImpl<unsigned> &FirstActions);
+ void computeActionsTable(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions);
void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
RangeMapType &PadMap);
@@ -86,7 +86,7 @@ protected:
/// no entry and must not be contained in the try-range of any entry - they
/// form gaps in the table. Entries must be ordered by try-range address.
void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
- const SmallVectorImpl<const LandingPadInfo *> &LPs,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
const SmallVectorImpl<unsigned> &FirstActions);
/// Emit landing pads and actions.
@@ -110,9 +110,9 @@ protected:
/// catches in the function. This tables is reversed indexed base 1.
void emitExceptionTable();
- virtual void emitTypeInfos(unsigned TTypeEncoding);
+ virtual void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel);
- // Helpers for for identifying what kind of clause an EH typeid or selector
+ // Helpers for identifying what kind of clause an EH typeid or selector
// corresponds to. Negative selectors are for filter clauses, the zero
// selector is for cleanups, and positive selectors are for catch clauses.
static bool isFilterEHSelector(int Selector) { return Selector < 0; }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index e459c02c9a6e..49cc376fcc98 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/GCs.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
@@ -27,6 +26,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
@@ -77,7 +77,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
// Emit PointCount.
OS.AddComment("safe point count");
- AP.EmitInt16(MD.size());
+ AP.emitInt16(MD.size());
// And each safe point...
for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE;
@@ -94,7 +94,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
// Emit the stack frame size.
OS.AddComment("stack frame size (in words)");
- AP.EmitInt16(MD.getFrameSize() / IntPtrSize);
+ AP.emitInt16(MD.getFrameSize() / IntPtrSize);
// Emit stack arity, i.e. the number of stacked arguments.
unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6;
@@ -102,11 +102,11 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
? MD.getFunction().arg_size() - RegisteredArgs
: 0;
OS.AddComment("stack arity");
- AP.EmitInt16(StackArity);
+ AP.emitInt16(StackArity);
// Emit the number of live roots in the function.
OS.AddComment("live root count");
- AP.EmitInt16(MD.live_size(PI));
+ AP.emitInt16(MD.live_size(PI));
// And for each live root...
for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
@@ -114,7 +114,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
LI != LE; ++LI) {
// Emit live root's offset within the stack frame.
OS.AddComment("stack index (offset / wordsize)");
- AP.EmitInt16(LI->StackOffset / IntPtrSize);
+ AP.emitInt16(LI->StackOffset / IntPtrSize);
}
}
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index e0cc241dd23f..59a57ed30d10 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -18,7 +18,6 @@
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/GCs.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Mangler.h"
@@ -27,6 +26,7 @@
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include <cctype>
#include <cstddef>
#include <cstdint>
@@ -129,7 +129,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
// Very rude!
report_fatal_error(" Too much descriptor for ocaml GC");
}
- AP.EmitInt16(NumDescriptors);
+ AP.emitInt16(NumDescriptors);
AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
@@ -166,8 +166,8 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
}
AP.OutStreamer->EmitSymbolValue(J->Label, IntPtrSize);
- AP.EmitInt16(FrameSize);
- AP.EmitInt16(LiveCount);
+ AP.emitInt16(FrameSize);
+ AP.emitInt16(LiveCount);
for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
KE = FI.live_end(J);
@@ -178,7 +178,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
"GC root stack offset is outside of fixed stack frame and out "
"of range for ocaml GC!");
}
- AP.EmitInt16(K->StackOffset);
+ AP.emitInt16(K->StackOffset);
}
AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
new file mode 100644
index 000000000000..18d37caf57ee
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -0,0 +1,45 @@
+//===-- CodeGen/AsmPrinter/WinCFGuard.cpp - Control Flow Guard Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing Win64 exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "WinCFGuard.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCStreamer.h"
+
+#include <vector>
+
+using namespace llvm;
+
+WinCFGuard::WinCFGuard(AsmPrinter *A) : AsmPrinterHandler(), Asm(A) {}
+
+WinCFGuard::~WinCFGuard() {}
+
+void WinCFGuard::endModule() {
+ const Module *M = Asm->MMI->getModule();
+ std::vector<const Function *> Functions;
+ for (const Function &F : *M)
+ if (F.hasAddressTaken())
+ Functions.push_back(&F);
+ if (Functions.empty())
+ return;
+ auto &OS = *Asm->OutStreamer;
+ OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection());
+ for (const Function *F : Functions)
+ OS.EmitCOFFSymbolIndex(Asm->getSymbol(F));
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
new file mode 100644
index 000000000000..124e8f04bfad
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
@@ -0,0 +1,54 @@
+//===-- WinCFGuard.h - Windows Control Flow Guard Handling ----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing windows exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H
+
+#include "AsmPrinterHandler.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class LLVM_LIBRARY_VISIBILITY WinCFGuard : public AsmPrinterHandler {
+ /// Target of directive emission.
+ AsmPrinter *Asm;
+
+public:
+ WinCFGuard(AsmPrinter *A);
+ ~WinCFGuard() override;
+
+ void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
+
+ /// Emit the Control Flow Guard function ID table
+ void endModule() override;
+
+ /// Gather pre-function debug information.
+ /// Every beginFunction(MF) call should be followed by an endFunction(MF)
+ /// call.
+ void beginFunction(const MachineFunction *MF) override {}
+
+ /// Gather post-function debug information.
+ /// Please note that some AsmPrinter implementations may not call
+ /// beginFunction at all.
+ void endFunction(const MachineFunction *MF) override {}
+
+ /// Process beginning of an instruction.
+ void beginInstruction(const MachineInstr *MI) override {}
+
+ /// Process end of an instruction.
+ void endInstruction() override {}
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index a6a8e84a949f..eff73a58d8d2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -21,7 +21,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -35,6 +34,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
index 371061c2c2ec..eed3c4453ffc 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -100,7 +100,7 @@ public:
/// Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
- /// \brief Emit target-specific EH funclet machinery.
+ /// Emit target-specific EH funclet machinery.
void beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym) override;
void endFunclet() override;
};
diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 7042bc997223..f2615edaece2 100644
--- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -98,8 +98,8 @@ namespace {
CreateCmpXchgInstFun CreateCmpXchg);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
- bool isIdempotentRMW(AtomicRMWInst *AI);
- bool simplifyIdempotentRMW(AtomicRMWInst *AI);
+ bool isIdempotentRMW(AtomicRMWInst *RMWI);
+ bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
Value *PointerOperand, Value *ValueOperand,
@@ -379,8 +379,8 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
NewLI->setAlignment(LI->getAlignment());
NewLI->setVolatile(LI->isVolatile());
NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
- DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
-
+ LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
+
Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
LI->replaceAllUsesWith(NewVal);
LI->eraseFromParent();
@@ -462,7 +462,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
NewSI->setAlignment(SI->getAlignment());
NewSI->setVolatile(SI->isVolatile());
NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
- DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
+ LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
SI->eraseFromParent();
return NewSI;
}
@@ -943,7 +943,7 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *
CI->getSyncScopeID());
NewCI->setVolatile(CI->isVolatile());
NewCI->setWeak(CI->isWeak());
- DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
+ LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
Value *Succ = Builder.CreateExtractValue(NewCI, 1);
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index 7f358a679366..c7a0c6457164 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -152,7 +152,7 @@ BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
assert(MBB->pred_empty() && "MBB must be dead!");
- DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+ LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
MachineFunction *MF = MBB->getParent();
// drop all successors.
@@ -164,7 +164,7 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
// Remove the block.
MF->erase(MBB);
- FuncletMembership.erase(MBB);
+ EHScopeMembership.erase(MBB);
if (MLI)
MLI->removeBlock(MBB);
}
@@ -199,8 +199,8 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
}
- // Recalculate funclet membership.
- FuncletMembership = getFuncletMembership(MF);
+ // Recalculate EH scope membership.
+ EHScopeMembership = getEHScopeMembership(MF);
bool MadeChangeThisIteration = true;
while (MadeChangeThisIteration) {
@@ -296,6 +296,11 @@ static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {
return HashMachineInstr(*I);
}
+/// Whether MI should be counted as an instruction when calculating common tail.
+static bool countsAsInstruction(const MachineInstr &MI) {
+ return !(MI.isDebugValue() || MI.isCFIInstruction());
+}
+
/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
/// of instructions they actually have in common together at their end. Return
/// iterators for the first shared instruction in each block.
@@ -310,26 +315,27 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
--I1; --I2;
// Skip debugging pseudos; necessary to avoid changing the code.
- while (I1->isDebugValue()) {
+ while (!countsAsInstruction(*I1)) {
if (I1==MBB1->begin()) {
- while (I2->isDebugValue()) {
- if (I2==MBB2->begin())
+ while (!countsAsInstruction(*I2)) {
+ if (I2==MBB2->begin()) {
// I1==DBG at begin; I2==DBG at begin
- return TailLen;
+ goto SkipTopCFIAndReturn;
+ }
--I2;
}
++I2;
// I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
- return TailLen;
+ goto SkipTopCFIAndReturn;
}
--I1;
}
// I1==first (untested) non-DBG preceding known match
- while (I2->isDebugValue()) {
+ while (!countsAsInstruction(*I2)) {
if (I2==MBB2->begin()) {
++I1;
// I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
- return TailLen;
+ goto SkipTopCFIAndReturn;
}
--I2;
}
@@ -352,7 +358,7 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
// I1==MBB1->begin() work as expected.)
if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
--I2;
- while (I2->isDebugValue()) {
+ while (I2->isDebugInstr()) {
if (I2 == MBB2->begin())
return TailLen;
--I2;
@@ -361,13 +367,44 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
}
if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
--I1;
- while (I1->isDebugValue()) {
+ while (I1->isDebugInstr()) {
if (I1 == MBB1->begin())
return TailLen;
--I1;
}
++I1;
}
+
+SkipTopCFIAndReturn:
+ // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if
+ // I1 and I2 are non-identical when compared and then one or both of them ends
+ // up pointing to a CFI instruction after being incremented. For example:
+ /*
+ BB1:
+ ...
+ INSTRUCTION_A
+ ADD32ri8 <- last common instruction
+ ...
+ BB2:
+ ...
+ INSTRUCTION_B
+ CFI_INSTRUCTION
+ ADD32ri8 <- last common instruction
+ ...
+ */
+ // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after
+ // incrementing the iterators, I1 will point to ADD, however I2 will point to
+ // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the
+ // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI
+ // instruction.
+ while (I1 != MBB1->end() && I1->isCFIInstruction()) {
+ ++I1;
+ }
+
+ while (I2 != MBB2->end() && I2->isCFIInstruction()) {
+ ++I2;
+ }
+
return TailLen;
}
@@ -438,11 +475,11 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
if (UpdateLiveIns)
computeAndAddLiveIns(LiveRegs, *NewMBB);
- // Add the new block to the funclet.
- const auto &FuncletI = FuncletMembership.find(&CurMBB);
- if (FuncletI != FuncletMembership.end()) {
- auto n = FuncletI->second;
- FuncletMembership[NewMBB] = n;
+ // Add the new block to the EH scope.
+ const auto &EHScopeI = EHScopeMembership.find(&CurMBB);
+ if (EHScopeI != EHScopeMembership.end()) {
+ auto n = EHScopeI->second;
+ EHScopeMembership[NewMBB] = n;
}
return NewMBB;
@@ -454,7 +491,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E) {
unsigned Time = 0;
for (; I != E; ++I) {
- if (I->isDebugValue())
+ if (!countsAsInstruction(*I))
continue;
if (I->isCall())
Time += 10;
@@ -589,7 +626,7 @@ static bool blockEndsInUnreachable(const MachineBasicBlock *MBB) {
/// SuccBB A common successor of MBB1, MBB2 which are in a canonical form
/// relative to SuccBB
/// PredBB The layout predecessor of SuccBB, if any.
-/// FuncletMembership map from block to funclet #.
+/// EHScopeMembership map from block to EH scope #.
/// AfterPlacement True if we are merging blocks after layout. Stricter
/// thresholds apply to prevent undoing tail-duplication.
static bool
@@ -598,24 +635,24 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
MachineBasicBlock::iterator &I1,
MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB,
- DenseMap<const MachineBasicBlock *, int> &FuncletMembership,
+ DenseMap<const MachineBasicBlock *, int> &EHScopeMembership,
bool AfterPlacement) {
- // It is never profitable to tail-merge blocks from two different funclets.
- if (!FuncletMembership.empty()) {
- auto Funclet1 = FuncletMembership.find(MBB1);
- assert(Funclet1 != FuncletMembership.end());
- auto Funclet2 = FuncletMembership.find(MBB2);
- assert(Funclet2 != FuncletMembership.end());
- if (Funclet1->second != Funclet2->second)
+ // It is never profitable to tail-merge blocks from two different EH scopes.
+ if (!EHScopeMembership.empty()) {
+ auto EHScope1 = EHScopeMembership.find(MBB1);
+ assert(EHScope1 != EHScopeMembership.end());
+ auto EHScope2 = EHScopeMembership.find(MBB2);
+ assert(EHScope2 != EHScopeMembership.end());
+ if (EHScope1->second != EHScope2->second)
return false;
}
CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
if (CommonTailLen == 0)
return false;
- DEBUG(dbgs() << "Common tail length of " << printMBBReference(*MBB1)
- << " and " << printMBBReference(*MBB2) << " is " << CommonTailLen
- << '\n');
+ LLVM_DEBUG(dbgs() << "Common tail length of " << printMBBReference(*MBB1)
+ << " and " << printMBBReference(*MBB2) << " is "
+ << CommonTailLen << '\n');
// It's almost always profitable to merge any number of non-terminator
// instructions with the block that falls through into the common successor.
@@ -706,7 +743,7 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
MinCommonTailLength,
CommonTailLen, TrialBBI1, TrialBBI2,
SuccBB, PredBB,
- FuncletMembership,
+ EHScopeMembership,
AfterBlockPlacement)) {
if (CommonTailLen > maxCommonTailLength) {
SameTails.clear();
@@ -770,8 +807,8 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
SameTails[commonTailIndex].getTailStartPos();
MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
- DEBUG(dbgs() << "\nSplitting " << printMBBReference(*MBB) << ", size "
- << maxCommonTailLength);
+ LLVM_DEBUG(dbgs() << "\nSplitting " << printMBBReference(*MBB) << ", size "
+ << maxCommonTailLength);
// If the split block unconditionally falls-thru to SuccBB, it will be
// merged. In control flow terms it should then take SuccBB's name. e.g. If
@@ -780,7 +817,7 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
SuccBB->getBasicBlock() : MBB->getBasicBlock();
MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB);
if (!newMBB) {
- DEBUG(dbgs() << "... failed!");
+ LLVM_DEBUG(dbgs() << "... failed!");
return false;
}
@@ -814,12 +851,12 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
assert(MBBI != MBBIE && "Reached BB end within common tail length!");
(void)MBBIE;
- if (MBBI->isDebugValue()) {
+ if (!countsAsInstruction(*MBBI)) {
++MBBI;
continue;
}
- while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue())
+ while ((MBBICommon != MBBIECommon) && !countsAsInstruction(*MBBICommon))
++MBBICommon;
assert(MBBICommon != MBBIECommon &&
@@ -859,7 +896,7 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {
}
for (auto &MI : *MBB) {
- if (MI.isDebugValue())
+ if (!countsAsInstruction(MI))
continue;
DebugLoc DL = MI.getDebugLoc();
for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) {
@@ -869,7 +906,7 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {
auto &Pos = NextCommonInsts[i];
assert(Pos != SameTails[i].getBlock()->end() &&
"Reached BB end within common tail");
- while (Pos->isDebugValue()) {
+ while (!countsAsInstruction(*Pos)) {
++Pos;
assert(Pos != SameTails[i].getBlock()->end() &&
"Reached BB end within common tail");
@@ -884,11 +921,12 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {
if (UpdateLiveIns) {
LivePhysRegs NewLiveIns(*TRI);
computeLiveIns(NewLiveIns, *MBB);
+ LiveRegs.init(*TRI);
// The flag merging may lead to some register uses no longer using the
// <undef> flag, add IMPLICIT_DEFs in the predecessors as necessary.
for (MachineBasicBlock *Pred : MBB->predecessors()) {
- LiveRegs.init(*TRI);
+ LiveRegs.clear();
LiveRegs.addLiveOuts(*Pred);
MachineBasicBlock::iterator InsertBefore = Pred->getFirstTerminator();
for (unsigned Reg : NewLiveIns) {
@@ -919,18 +957,19 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
unsigned MinCommonTailLength) {
bool MadeChange = false;
- DEBUG(dbgs() << "\nTryTailMergeBlocks: ";
- for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) dbgs()
- << printMBBReference(*MergePotentials[i].getBlock())
- << (i == e - 1 ? "" : ", ");
- dbgs() << "\n"; if (SuccBB) {
- dbgs() << " with successor " << printMBBReference(*SuccBB) << '\n';
- if (PredBB)
- dbgs() << " which has fall-through from "
- << printMBBReference(*PredBB) << "\n";
- } dbgs() << "Looking for common tails of at least "
- << MinCommonTailLength << " instruction"
- << (MinCommonTailLength == 1 ? "" : "s") << '\n';);
+ LLVM_DEBUG(
+ dbgs() << "\nTryTailMergeBlocks: ";
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) dbgs()
+ << printMBBReference(*MergePotentials[i].getBlock())
+ << (i == e - 1 ? "" : ", ");
+ dbgs() << "\n"; if (SuccBB) {
+ dbgs() << " with successor " << printMBBReference(*SuccBB) << '\n';
+ if (PredBB)
+ dbgs() << " which has fall-through from "
+ << printMBBReference(*PredBB) << "\n";
+ } dbgs() << "Looking for common tails of at least "
+ << MinCommonTailLength << " instruction"
+ << (MinCommonTailLength == 1 ? "" : "s") << '\n';);
// Sort by hash value so that blocks with identical end sequences sort
// together.
@@ -1010,19 +1049,19 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// MBB is common tail. Adjust all other BB's to jump to this one.
// Traversal must be forwards so erases work.
- DEBUG(dbgs() << "\nUsing common tail in " << printMBBReference(*MBB)
- << " for ");
+ LLVM_DEBUG(dbgs() << "\nUsing common tail in " << printMBBReference(*MBB)
+ << " for ");
for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
if (commonTailIndex == i)
continue;
- DEBUG(dbgs() << printMBBReference(*SameTails[i].getBlock())
- << (i == e - 1 ? "" : ", "));
+ LLVM_DEBUG(dbgs() << printMBBReference(*SameTails[i].getBlock())
+ << (i == e - 1 ? "" : ", "));
// Hack the end off BB i, making it jump to BB commonTailIndex instead.
replaceTailWithBranchTo(SameTails[i].getTailStartPos(), *MBB);
// BB i is no longer a predecessor of SuccBB; remove it from the worklist.
MergePotentials.erase(SameTails[i].getMPIter());
}
- DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "\n");
// We leave commonTailIndex in the worklist in case there are other blocks
// that match it with a smaller number of instructions.
MadeChange = true;
@@ -1254,8 +1293,8 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
// Make sure blocks are numbered in order
MF.RenumberBlocks();
- // Renumbering blocks alters funclet membership, recalculate it.
- FuncletMembership = getFuncletMembership(MF);
+ // Renumbering blocks alters EH scope membership, recalculate it.
+ EHScopeMembership = getEHScopeMembership(MF);
for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
I != E; ) {
@@ -1319,6 +1358,53 @@ static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
return DebugLoc();
}
+static void copyDebugInfoToPredecessor(const TargetInstrInfo *TII,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock &PredMBB) {
+ auto InsertBefore = PredMBB.getFirstTerminator();
+ for (MachineInstr &MI : MBB.instrs())
+ if (MI.isDebugValue()) {
+ TII->duplicate(PredMBB, InsertBefore, MI);
+ LLVM_DEBUG(dbgs() << "Copied debug value from empty block to pred: "
+ << MI);
+ }
+}
+
+static void copyDebugInfoToSuccessor(const TargetInstrInfo *TII,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock &SuccMBB) {
+ auto InsertBefore = SuccMBB.SkipPHIsAndLabels(SuccMBB.begin());
+ for (MachineInstr &MI : MBB.instrs())
+ if (MI.isDebugValue()) {
+ TII->duplicate(SuccMBB, InsertBefore, MI);
+ LLVM_DEBUG(dbgs() << "Copied debug value from empty block to succ: "
+ << MI);
+ }
+}
+
+// Try to salvage DBG_VALUE instructions from an otherwise empty block. If such
+// a basic block is removed we would lose the debug information unless we have
+// copied the information to a predecessor/successor.
+//
+// TODO: This function only handles some simple cases. An alternative would be
+// to run a heavier analysis, such as the LiveDebugValues pass, before we do
+// branch folding.
+static void salvageDebugInfoFromEmptyBlock(const TargetInstrInfo *TII,
+ MachineBasicBlock &MBB) {
+ assert(IsEmptyBlock(&MBB) && "Expected an empty block (except debug info).");
+ // If this MBB is the only predecessor of a successor it is legal to copy
+ // DBG_VALUE instructions to the beginning of the successor.
+ for (MachineBasicBlock *SuccBB : MBB.successors())
+ if (SuccBB->pred_size() == 1)
+ copyDebugInfoToSuccessor(TII, MBB, *SuccBB);
+ // If this MBB is the only successor of a predecessor it is legal to copy the
+ // DBG_VALUE instructions to the end of the predecessor (just before the
+ // terminators, assuming that the terminator isn't affecting the DBG_VALUE).
+ for (MachineBasicBlock *PredBB : MBB.predecessors())
+ if (PredBB->succ_size() == 1)
+ copyDebugInfoToPredecessor(TII, MBB, *PredBB);
+}
+
bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
bool MadeChange = false;
MachineFunction &MF = *MBB->getParent();
@@ -1327,14 +1413,14 @@ ReoptimizeBlock:
MachineFunction::iterator FallThrough = MBB->getIterator();
++FallThrough;
- // Make sure MBB and FallThrough belong to the same funclet.
- bool SameFunclet = true;
- if (!FuncletMembership.empty() && FallThrough != MF.end()) {
- auto MBBFunclet = FuncletMembership.find(MBB);
- assert(MBBFunclet != FuncletMembership.end());
- auto FallThroughFunclet = FuncletMembership.find(&*FallThrough);
- assert(FallThroughFunclet != FuncletMembership.end());
- SameFunclet = MBBFunclet->second == FallThroughFunclet->second;
+ // Make sure MBB and FallThrough belong to the same EH scope.
+ bool SameEHScope = true;
+ if (!EHScopeMembership.empty() && FallThrough != MF.end()) {
+ auto MBBEHScope = EHScopeMembership.find(MBB);
+ assert(MBBEHScope != EHScopeMembership.end());
+ auto FallThroughEHScope = EHScopeMembership.find(&*FallThrough);
+ assert(FallThroughEHScope != EHScopeMembership.end());
+ SameEHScope = MBBEHScope->second == FallThroughEHScope->second;
}
// If this block is empty, make everyone use its fall-through, not the block
@@ -1342,7 +1428,8 @@ ReoptimizeBlock:
// points to this block. Blocks with their addresses taken shouldn't be
// optimized away.
if (IsEmptyBlock(MBB) && !MBB->isEHPad() && !MBB->hasAddressTaken() &&
- SameFunclet) {
+ SameEHScope) {
+ salvageDebugInfoFromEmptyBlock(TII, *MBB);
// Dead block? Leave for cleanup later.
if (MBB->pred_empty()) return MadeChange;
@@ -1406,8 +1493,8 @@ ReoptimizeBlock:
if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
PrevBB.succ_size() == 1 &&
!MBB->hasAddressTaken() && !MBB->isEHPad()) {
- DEBUG(dbgs() << "\nMerging into block: " << PrevBB
- << "From MBB: " << *MBB);
+ LLVM_DEBUG(dbgs() << "\nMerging into block: " << PrevBB
+ << "From MBB: " << *MBB);
// Remove redundant DBG_VALUEs first.
if (PrevBB.begin() != PrevBB.end()) {
MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
@@ -1416,7 +1503,7 @@ ReoptimizeBlock:
// Check if DBG_VALUE at the end of PrevBB is identical to the
// DBG_VALUE at the beginning of MBB.
while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
- && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) {
+ && PrevBBIter->isDebugInstr() && MBBIter->isDebugInstr()) {
if (!MBBIter->isIdenticalTo(*PrevBBIter))
break;
MachineInstr &DuplicateDbg = *MBBIter;
@@ -1493,8 +1580,8 @@ ReoptimizeBlock:
// Reverse the branch so we will fall through on the previous true cond.
SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
if (!TII->reverseBranchCondition(NewPriorCond)) {
- DEBUG(dbgs() << "\nMoving MBB: " << *MBB
- << "To make fallthrough to: " << *PriorTBB << "\n");
+ LLVM_DEBUG(dbgs() << "\nMoving MBB: " << *MBB
+ << "To make fallthrough to: " << *PriorTBB << "\n");
DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->removeBranch(PrevBB);
@@ -1829,8 +1916,12 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
if (Uses.empty())
return Loc;
+ // If the terminator is the only instruction in the block and Uses is not
+ // empty (or we would have returned above), we can still safely hoist
+ // instructions just before the terminator as long as the Defs/Uses are not
+ // violated (which is checked in HoistCommonCodeInSuccs).
if (Loc == MBB->begin())
- return MBB->end();
+ return Loc;
// The terminator is probably a conditional branch, try not to separate the
// branch from condition setting instruction.
@@ -1917,7 +2008,6 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
return false;
bool HasDups = false;
- SmallVector<unsigned, 4> LocalDefs, LocalKills;
SmallSet<unsigned, 4> ActiveDefsSet, AllDefsSet;
MachineBasicBlock::iterator TIB = TBB->begin();
MachineBasicBlock::iterator FIB = FBB->begin();
@@ -2000,7 +2090,6 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
if (!Reg)
continue;
if (!AllDefsSet.count(Reg)) {
- LocalKills.push_back(Reg);
continue;
}
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -2018,7 +2107,6 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
unsigned Reg = MO.getReg();
if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- LocalDefs.push_back(Reg);
addRegAndItsAliases(Reg, TRI, ActiveDefsSet);
addRegAndItsAliases(Reg, TRI, AllDefsSet);
}
@@ -2034,25 +2122,9 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
MBB->splice(Loc, TBB, TBB->begin(), TIB);
FBB->erase(FBB->begin(), FIB);
- // Update livein's.
- bool ChangedLiveIns = false;
- for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
- unsigned Def = LocalDefs[i];
- if (ActiveDefsSet.count(Def)) {
- TBB->addLiveIn(Def);
- FBB->addLiveIn(Def);
- ChangedLiveIns = true;
- }
- }
- for (unsigned K : LocalKills) {
- TBB->removeLiveIn(K);
- FBB->removeLiveIn(K);
- ChangedLiveIns = true;
- }
-
- if (ChangedLiveIns) {
- TBB->sortUniqueLiveIns();
- FBB->sortUniqueLiveIns();
+ if (UpdateLiveIns) {
+ recomputeLiveIns(*TBB);
+ recomputeLiveIns(*FBB);
}
++NumHoist;
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h
index 0f0952550137..accd0ab7317b 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.h
@@ -38,11 +38,11 @@ class TargetRegisterInfo;
explicit BranchFolder(bool defaultEnableTailMerge,
bool CommonHoist,
- MBFIWrapper &MBFI,
- const MachineBranchProbabilityInfo &MBPI,
+ MBFIWrapper &FreqInfo,
+ const MachineBranchProbabilityInfo &ProbInfo,
// Min tail length to merge. Defaults to commandline
// flag. Ignored for optsize.
- unsigned MinCommonTailLength = 0);
+ unsigned MinTailLength = 0);
/// Perhaps branch folding, tail merging and other CFG optimizations on the
/// given function. Block placement changes the layout and may create new
@@ -75,7 +75,7 @@ class TargetRegisterInfo;
std::vector<MergePotentialsElt> MergePotentials;
SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
- DenseMap<const MachineBasicBlock *, int> FuncletMembership;
+ DenseMap<const MachineBasicBlock *, int> EHScopeMembership;
class SameTailElt {
MPIterator MPIter;
@@ -132,7 +132,7 @@ class TargetRegisterInfo;
LivePhysRegs LiveRegs;
public:
- /// \brief This class keeps track of branch frequencies of newly created
+ /// This class keeps track of branch frequencies of newly created
/// blocks and tail-merged blocks.
class MBFIWrapper {
public:
diff --git a/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp
index 0d87f142c7cc..c092da2b6602 100644
--- a/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
@@ -95,7 +96,7 @@ class BranchRelaxation : public MachineFunctionPass {
MachineBasicBlock *splitBlockBeforeInstr(MachineInstr &MI,
MachineBasicBlock *DestBB);
- void adjustBlockOffsets(MachineBasicBlock &MBB);
+ void adjustBlockOffsets(MachineBasicBlock &Start);
bool isBlockInRange(const MachineInstr &MI, const MachineBasicBlock &BB) const;
bool fixupConditionalBranch(MachineInstr &MI);
@@ -287,10 +288,11 @@ bool BranchRelaxation::isBlockInRange(
if (TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - BrOffset))
return true;
- DEBUG(dbgs() << "Out of range branch to destination "
- << printMBBReference(DestBB) << " from "
- << printMBBReference(*MI.getParent()) << " to " << DestOffset
- << " offset " << DestOffset - BrOffset << '\t' << MI);
+ LLVM_DEBUG(dbgs() << "Out of range branch to destination "
+ << printMBBReference(DestBB) << " from "
+ << printMBBReference(*MI.getParent()) << " to "
+ << DestOffset << " offset " << DestOffset - BrOffset << '\t'
+ << MI);
return false;
}
@@ -302,8 +304,41 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ MachineBasicBlock *NewBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
+ auto insertUncondBranch = [&](MachineBasicBlock *MBB,
+ MachineBasicBlock *DestBB) {
+ unsigned &BBSize = BlockInfo[MBB->getNumber()].Size;
+ int NewBrSize = 0;
+ TII->insertUnconditionalBranch(*MBB, DestBB, DL, &NewBrSize);
+ BBSize += NewBrSize;
+ };
+ auto insertBranch = [&](MachineBasicBlock *MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ SmallVectorImpl<MachineOperand>& Cond) {
+ unsigned &BBSize = BlockInfo[MBB->getNumber()].Size;
+ int NewBrSize = 0;
+ TII->insertBranch(*MBB, TBB, FBB, Cond, DL, &NewBrSize);
+ BBSize += NewBrSize;
+ };
+ auto removeBranch = [&](MachineBasicBlock *MBB) {
+ unsigned &BBSize = BlockInfo[MBB->getNumber()].Size;
+ int RemovedSize = 0;
+ TII->removeBranch(*MBB, &RemovedSize);
+ BBSize -= RemovedSize;
+ };
+
+ auto finalizeBlockChanges = [&](MachineBasicBlock *MBB,
+ MachineBasicBlock *NewBB) {
+ // Keep the block offsets up to date.
+ adjustBlockOffsets(*MBB);
+
+ // Need to fix live-in lists if we track liveness.
+ if (NewBB && TRI->trackLivenessAfterRegAlloc(*MF))
+ computeAndAddLiveIns(LiveRegs, *NewBB);
+ };
+
bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond);
assert(!Fail && "branches to be relaxed must be analyzable");
(void)Fail;
@@ -316,71 +351,90 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
// b L1
// L2:
- if (FBB && isBlockInRange(MI, *FBB)) {
- // Last MI in the BB is an unconditional branch. We can simply invert the
- // condition and swap destinations:
- // beq L1
- // b L2
- // =>
- // bne L2
- // b L1
- DEBUG(dbgs() << " Invert condition and swap "
- "its destination with " << MBB->back());
-
- TII->reverseBranchCondition(Cond);
- int OldSize = 0, NewSize = 0;
- TII->removeBranch(*MBB, &OldSize);
- TII->insertBranch(*MBB, FBB, TBB, Cond, DL, &NewSize);
-
- BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize);
- return true;
- } else if (FBB) {
- // We need to split the basic block here to obtain two long-range
- // unconditional branches.
- auto &NewBB = *MF->CreateMachineBasicBlock(MBB->getBasicBlock());
- MF->insert(++MBB->getIterator(), &NewBB);
-
- // Insert an entry into BlockInfo to align it properly with the block
- // numbers.
- BlockInfo.insert(BlockInfo.begin() + NewBB.getNumber(), BasicBlockInfo());
-
- unsigned &NewBBSize = BlockInfo[NewBB.getNumber()].Size;
- int NewBrSize;
- TII->insertUnconditionalBranch(NewBB, FBB, DL, &NewBrSize);
- NewBBSize += NewBrSize;
-
- // Update the successor lists according to the transformation to follow.
- // Do it here since if there's no split, no update is needed.
- MBB->replaceSuccessor(FBB, &NewBB);
- NewBB.addSuccessor(FBB);
+ bool ReversedCond = !TII->reverseBranchCondition(Cond);
+ if (ReversedCond) {
+ if (FBB && isBlockInRange(MI, *FBB)) {
+ // Last MI in the BB is an unconditional branch. We can simply invert the
+ // condition and swap destinations:
+ // beq L1
+ // b L2
+ // =>
+ // bne L2
+ // b L1
+ LLVM_DEBUG(dbgs() << " Invert condition and swap "
+ "its destination with "
+ << MBB->back());
+
+ removeBranch(MBB);
+ insertBranch(MBB, FBB, TBB, Cond);
+ finalizeBlockChanges(MBB, nullptr);
+ return true;
+ }
+ if (FBB) {
+ // We need to split the basic block here to obtain two long-range
+ // unconditional branches.
+ NewBB = createNewBlockAfter(*MBB);
+
+ insertUncondBranch(NewBB, FBB);
+ // Update the succesor lists according to the transformation to follow.
+ // Do it here since if there's no split, no update is needed.
+ MBB->replaceSuccessor(FBB, NewBB);
+ NewBB->addSuccessor(FBB);
+ }
- // Need to fix live-in lists if we track liveness.
- if (TRI->trackLivenessAfterRegAlloc(*MF))
- computeAndAddLiveIns(LiveRegs, NewBB);
+ // We now have an appropriate fall-through block in place (either naturally or
+ // just created), so we can use the inverted the condition.
+ MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB));
+
+ LLVM_DEBUG(dbgs() << " Insert B to " << printMBBReference(*TBB)
+ << ", invert condition and change dest. to "
+ << printMBBReference(NextBB) << '\n');
+
+ removeBranch(MBB);
+ // Insert a new conditional branch and a new unconditional branch.
+ insertBranch(MBB, &NextBB, TBB, Cond);
+
+ finalizeBlockChanges(MBB, NewBB);
+ return true;
}
+ // Branch cond can't be inverted.
+ // In this case we always add a block after the MBB.
+ LLVM_DEBUG(dbgs() << " The branch condition can't be inverted. "
+ << " Insert a new BB after " << MBB->back());
- // We now have an appropriate fall-through block in place (either naturally or
- // just created), so we can invert the condition.
- MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB));
+ if (!FBB)
+ FBB = &(*std::next(MachineFunction::iterator(MBB)));
- DEBUG(dbgs() << " Insert B to " << printMBBReference(*TBB)
- << ", invert condition and change dest. to "
- << printMBBReference(NextBB) << '\n');
+ // This is the block with cond. branch and the distance to TBB is too long.
+ // beq L1
+ // L2:
- unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size;
+ // We do the following transformation:
+ // beq NewBB
+ // b L2
+ // NewBB:
+ // b L1
+ // L2:
- // Insert a new conditional branch and a new unconditional branch.
- int RemovedSize = 0;
- TII->reverseBranchCondition(Cond);
- TII->removeBranch(*MBB, &RemovedSize);
- MBBSize -= RemovedSize;
+ NewBB = createNewBlockAfter(*MBB);
+ insertUncondBranch(NewBB, TBB);
- int AddedSize = 0;
- TII->insertBranch(*MBB, &NextBB, TBB, Cond, DL, &AddedSize);
- MBBSize += AddedSize;
+ LLVM_DEBUG(dbgs() << " Insert cond B to the new BB "
+ << printMBBReference(*NewBB)
+ << " Keep the exiting condition.\n"
+ << " Insert B to " << printMBBReference(*FBB) << ".\n"
+ << " In the new BB: Insert B to "
+ << printMBBReference(*TBB) << ".\n");
- // Finally, keep the block offsets up to date.
- adjustBlockOffsets(*MBB);
+ // Update the successor lists according to the transformation to follow.
+ MBB->replaceSuccessor(TBB, NewBB);
+ NewBB->addSuccessor(TBB);
+
+ // Replace branch in the current (MBB) block.
+ removeBranch(MBB);
+ insertBranch(MBB, NewBB, FBB, Cond);
+
+ finalizeBlockChanges(MBB, NewBB);
return true;
}
@@ -490,7 +544,7 @@ bool BranchRelaxation::relaxBranchInstructions() {
bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
- DEBUG(dbgs() << "***** BranchRelaxation *****\n");
+ LLVM_DEBUG(dbgs() << "***** BranchRelaxation *****\n");
const TargetSubtargetInfo &ST = MF->getSubtarget();
TII = ST.getInstrInfo();
@@ -507,7 +561,7 @@ bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
// sizes of each block.
scanFunction();
- DEBUG(dbgs() << " Basic blocks before relaxation\n"; dumpBBs(););
+ LLVM_DEBUG(dbgs() << " Basic blocks before relaxation\n"; dumpBBs(););
bool MadeChange = false;
while (relaxBranchInstructions())
@@ -516,7 +570,7 @@ bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
// After a while, this might be made debug-only, but it is not expensive.
verify();
- DEBUG(dbgs() << " Basic blocks after relaxation\n\n"; dumpBBs());
+ LLVM_DEBUG(dbgs() << " Basic blocks after relaxation\n\n"; dumpBBs());
BlockInfo.clear();
diff --git a/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp
new file mode 100644
index 000000000000..7f098cb71657
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -0,0 +1,271 @@
+//==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Break False Dependency pass.
+///
+/// Some instructions have false dependencies which cause unnecessary stalls.
+/// For exmaple, instructions that only write part of a register, and implicitly
+/// need to read the other parts of the register. This may cause unwanted
+/// stalls preventing otherwise unrelated instructions from executing in
+/// parallel in an out-of-order CPU.
+/// This pass is aimed at identifying and avoiding these depepndencies when
+/// possible.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/ReachingDefAnalysis.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+
+using namespace llvm;
+
+namespace llvm {
+
+class BreakFalseDeps : public MachineFunctionPass {
+private:
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ RegisterClassInfo RegClassInfo;
+
+ /// List of undefined register reads in this block in forward order.
+ std::vector<std::pair<MachineInstr *, unsigned>> UndefReads;
+
+ /// Storage for register unit liveness.
+ LivePhysRegs LiveRegSet;
+
+ ReachingDefAnalysis *RDA;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ BreakFalseDeps() : MachineFunctionPass(ID) {
+ initializeBreakFalseDepsPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<ReachingDefAnalysis>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+private:
+ /// Process he given basic block.
+ void processBasicBlock(MachineBasicBlock *MBB);
+
+ /// Update def-ages for registers defined by MI.
+ /// Also break dependencies on partial defs and undef uses.
+ void processDefs(MachineInstr *MI);
+
+ /// Helps avoid false dependencies on undef registers by updating the
+ /// machine instructions' undef operand to use a register that the instruction
+ /// is truly dependent on, or use a register with clearance higher than Pref.
+ /// Returns true if it was able to find a true dependency, thus not requiring
+ /// a dependency breaking instruction regardless of clearance.
+ bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
+ unsigned Pref);
+
+ /// Return true to if it makes sense to break dependence on a partial
+ /// def or undef use.
+ bool shouldBreakDependence(MachineInstr *, unsigned OpIdx, unsigned Pref);
+
+ /// Break false dependencies on undefined register reads.
+ /// Walk the block backward computing precise liveness. This is expensive, so
+ /// we only do it on demand. Note that the occurrence of undefined register
+ /// reads that should be broken is very rare, but when they occur we may have
+ /// many in a single block.
+ void processUndefReads(MachineBasicBlock *);
+};
+
+} // namespace llvm
+
+#define DEBUG_TYPE "break-false-deps"
+
+char BreakFalseDeps::ID = 0;
+INITIALIZE_PASS_BEGIN(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false)
+INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis)
+INITIALIZE_PASS_END(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false)
+
+FunctionPass *llvm::createBreakFalseDeps() { return new BreakFalseDeps(); }
+
+bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
+ unsigned Pref) {
+ MachineOperand &MO = MI->getOperand(OpIdx);
+ assert(MO.isUndef() && "Expected undef machine operand");
+
+ unsigned OriginalReg = MO.getReg();
+
+ // Update only undef operands that have reg units that are mapped to one root.
+ for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) {
+ unsigned NumRoots = 0;
+ for (MCRegUnitRootIterator Root(*Unit, TRI); Root.isValid(); ++Root) {
+ NumRoots++;
+ if (NumRoots > 1)
+ return false;
+ }
+ }
+
+ // Get the undef operand's register class
+ const TargetRegisterClass *OpRC =
+ TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF);
+
+ // If the instruction has a true dependency, we can hide the false depdency
+ // behind it.
+ for (MachineOperand &CurrMO : MI->operands()) {
+ if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() ||
+ !OpRC->contains(CurrMO.getReg()))
+ continue;
+ // We found a true dependency - replace the undef register with the true
+ // dependency.
+ MO.setReg(CurrMO.getReg());
+ return true;
+ }
+
+ // Go over all registers in the register class and find the register with
+ // max clearance or clearance higher than Pref.
+ unsigned MaxClearance = 0;
+ unsigned MaxClearanceReg = OriginalReg;
+ ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC);
+ for (MCPhysReg Reg : Order) {
+ unsigned Clearance = RDA->getClearance(MI, Reg);
+ if (Clearance <= MaxClearance)
+ continue;
+ MaxClearance = Clearance;
+ MaxClearanceReg = Reg;
+
+ if (MaxClearance > Pref)
+ break;
+ }
+
+ // Update the operand if we found a register with better clearance.
+ if (MaxClearanceReg != OriginalReg)
+ MO.setReg(MaxClearanceReg);
+
+ return false;
+}
+
+bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
+ unsigned Pref) {
+ unsigned reg = MI->getOperand(OpIdx).getReg();
+ unsigned Clearance = RDA->getClearance(MI, reg);
+ LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
+
+ if (Pref > Clearance) {
+ LLVM_DEBUG(dbgs() << ": Break dependency.\n");
+ return true;
+ }
+ LLVM_DEBUG(dbgs() << ": OK .\n");
+ return false;
+}
+
+void BreakFalseDeps::processDefs(MachineInstr *MI) {
+ assert(!MI->isDebugInstr() && "Won't process debug values");
+
+ // Break dependence on undef uses. Do this before updating LiveRegs below.
+ unsigned OpNum;
+ unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
+ if (Pref) {
+ bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref);
+ // We don't need to bother trying to break a dependency if this
+ // instruction has a true dependency on that register through another
+ // operand - we'll have to wait for it to be available regardless.
+ if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref))
+ UndefReads.push_back(std::make_pair(MI, OpNum));
+ }
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ for (unsigned i = 0,
+ e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
+ i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isUse())
+ continue;
+ // Check clearance before partial register updates.
+ unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
+ if (Pref && shouldBreakDependence(MI, i, Pref))
+ TII->breakPartialRegDependency(*MI, i, TRI);
+ }
+}
+
+void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) {
+ if (UndefReads.empty())
+ return;
+
+ // Collect this block's live out register units.
+ LiveRegSet.init(*TRI);
+ // We do not need to care about pristine registers as they are just preserved
+ // but not actually used in the function.
+ LiveRegSet.addLiveOutsNoPristines(*MBB);
+
+ MachineInstr *UndefMI = UndefReads.back().first;
+ unsigned OpIdx = UndefReads.back().second;
+
+ for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
+ // Update liveness, including the current instruction's defs.
+ LiveRegSet.stepBackward(I);
+
+ if (UndefMI == &I) {
+ if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
+ TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI);
+
+ UndefReads.pop_back();
+ if (UndefReads.empty())
+ return;
+
+ UndefMI = UndefReads.back().first;
+ OpIdx = UndefReads.back().second;
+ }
+ }
+}
+
+void BreakFalseDeps::processBasicBlock(MachineBasicBlock *MBB) {
+ UndefReads.clear();
+ // If this block is not done, it makes little sense to make any decisions
+ // based on clearance information. We need to make a second pass anyway,
+ // and by then we'll have better information, so we can avoid doing the work
+ // to try and break dependencies now.
+ for (MachineInstr &MI : *MBB) {
+ if (!MI.isDebugInstr())
+ processDefs(&MI);
+ }
+ processUndefReads(MBB);
+}
+
+bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(mf.getFunction()))
+ return false;
+ MF = &mf;
+ TII = MF->getSubtarget().getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ RDA = &getAnalysis<ReachingDefAnalysis>();
+
+ RegClassInfo.runOnMachineFunction(mf);
+
+ LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n");
+
+ // Traverse the basic blocks.
+ for (MachineBasicBlock &MBB : mf) {
+ processBasicBlock(&MBB);
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp
new file mode 100644
index 000000000000..00ebf63fc174
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -0,0 +1,326 @@
+//===------ CFIInstrInserter.cpp - Insert additional CFI instructions -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass verifies incoming and outgoing CFA information of basic
+/// blocks. CFA information is information about offset and register set by CFI
+/// directives, valid at the start and end of a basic block. This pass checks
+/// that outgoing information of predecessors matches incoming information of
+/// their successors. Then it checks if blocks have correct CFA calculation rule
+/// set and inserts additional CFI instruction at their beginnings if they
+/// don't. CFI instructions are inserted if basic blocks have incorrect offset
+/// or register set by previous blocks, as a result of a non-linear layout of
+/// blocks in a function.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+static cl::opt<bool> VerifyCFI("verify-cfiinstrs",
+ cl::desc("Verify Call Frame Information instructions"),
+ cl::init(false),
+ cl::Hidden);
+
+namespace {
+class CFIInstrInserter : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ CFIInstrInserter() : MachineFunctionPass(ID) {
+ initializeCFIInstrInserterPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!MF.getMMI().hasDebugInfo() &&
+ !MF.getFunction().needsUnwindTableEntry())
+ return false;
+
+ MBBVector.resize(MF.getNumBlockIDs());
+ calculateCFAInfo(MF);
+
+ if (VerifyCFI) {
+ if (unsigned ErrorNum = verify(MF))
+ report_fatal_error("Found " + Twine(ErrorNum) +
+ " in/out CFI information errors.");
+ }
+ bool insertedCFI = insertCFIInstrs(MF);
+ MBBVector.clear();
+ return insertedCFI;
+ }
+
+ private:
+ struct MBBCFAInfo {
+ MachineBasicBlock *MBB;
+ /// Value of cfa offset valid at basic block entry.
+ int IncomingCFAOffset = -1;
+ /// Value of cfa offset valid at basic block exit.
+ int OutgoingCFAOffset = -1;
+ /// Value of cfa register valid at basic block entry.
+ unsigned IncomingCFARegister = 0;
+ /// Value of cfa register valid at basic block exit.
+ unsigned OutgoingCFARegister = 0;
+ /// If in/out cfa offset and register values for this block have already
+ /// been set or not.
+ bool Processed = false;
+ };
+
+ /// Contains cfa offset and register values valid at entry and exit of basic
+ /// blocks.
+ std::vector<MBBCFAInfo> MBBVector;
+
+ /// Calculate cfa offset and register values valid at entry and exit for all
+ /// basic blocks in a function.
+ void calculateCFAInfo(MachineFunction &MF);
+ /// Calculate cfa offset and register values valid at basic block exit by
+ /// checking the block for CFI instructions. Block's incoming CFA info remains
+ /// the same.
+ void calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo);
+ /// Update in/out cfa offset and register values for successors of the basic
+ /// block.
+ void updateSuccCFAInfo(MBBCFAInfo &MBBInfo);
+
+ /// Check if incoming CFA information of a basic block matches outgoing CFA
+ /// information of the previous block. If it doesn't, insert CFI instruction
+ /// at the beginning of the block that corrects the CFA calculation rule for
+ /// that block.
+ bool insertCFIInstrs(MachineFunction &MF);
+ /// Return the cfa offset value that should be set at the beginning of a MBB
+ /// if needed. The negated value is needed when creating CFI instructions that
+ /// set absolute offset.
+ int getCorrectCFAOffset(MachineBasicBlock *MBB) {
+ return -MBBVector[MBB->getNumber()].IncomingCFAOffset;
+ }
+
+ void report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ);
+ /// Go through each MBB in a function and check that outgoing offset and
+ /// register of its predecessors match incoming offset and register of that
+ /// MBB, as well as that incoming offset and register of its successors match
+ /// outgoing offset and register of the MBB.
+ unsigned verify(MachineFunction &MF);
+};
+} // namespace
+
+char CFIInstrInserter::ID = 0;
+INITIALIZE_PASS(CFIInstrInserter, "cfi-instr-inserter",
+ "Check CFA info and insert CFI instructions if needed", false,
+ false)
+FunctionPass *llvm::createCFIInstrInserter() { return new CFIInstrInserter(); }
+
+void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) {
+ // Initial CFA offset value i.e. the one valid at the beginning of the
+ // function.
+ int InitialOffset =
+ MF.getSubtarget().getFrameLowering()->getInitialCFAOffset(MF);
+ // Initial CFA register value i.e. the one valid at the beginning of the
+ // function.
+ unsigned InitialRegister =
+ MF.getSubtarget().getFrameLowering()->getInitialCFARegister(MF);
+
+ // Initialize MBBMap.
+ for (MachineBasicBlock &MBB : MF) {
+ MBBCFAInfo MBBInfo;
+ MBBInfo.MBB = &MBB;
+ MBBInfo.IncomingCFAOffset = InitialOffset;
+ MBBInfo.OutgoingCFAOffset = InitialOffset;
+ MBBInfo.IncomingCFARegister = InitialRegister;
+ MBBInfo.OutgoingCFARegister = InitialRegister;
+ MBBVector[MBB.getNumber()] = MBBInfo;
+ }
+
+ // Set in/out cfa info for all blocks in the function. This traversal is based
+ // on the assumption that the first block in the function is the entry block
+ // i.e. that it has initial cfa offset and register values as incoming CFA
+ // information.
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBBVector[MBB.getNumber()].Processed) continue;
+ updateSuccCFAInfo(MBBVector[MBB.getNumber()]);
+ }
+}
+
+void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
+ // Outgoing cfa offset set by the block.
+ int SetOffset = MBBInfo.IncomingCFAOffset;
+ // Outgoing cfa register set by the block.
+ unsigned SetRegister = MBBInfo.IncomingCFARegister;
+ const std::vector<MCCFIInstruction> &Instrs =
+ MBBInfo.MBB->getParent()->getFrameInstructions();
+
+ // Determine cfa offset and register set by the block.
+ for (MachineInstr &MI : *MBBInfo.MBB) {
+ if (MI.isCFIInstruction()) {
+ unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
+ const MCCFIInstruction &CFI = Instrs[CFIIndex];
+ switch (CFI.getOperation()) {
+ case MCCFIInstruction::OpDefCfaRegister:
+ SetRegister = CFI.getRegister();
+ break;
+ case MCCFIInstruction::OpDefCfaOffset:
+ SetOffset = CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpAdjustCfaOffset:
+ SetOffset += CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfa:
+ SetRegister = CFI.getRegister();
+ SetOffset = CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpRememberState:
+ // TODO: Add support for handling cfi_remember_state.
+#ifndef NDEBUG
+ report_fatal_error(
+ "Support for cfi_remember_state not implemented! Value of CFA "
+ "may be incorrect!\n");
+#endif
+ break;
+ case MCCFIInstruction::OpRestoreState:
+ // TODO: Add support for handling cfi_restore_state.
+#ifndef NDEBUG
+ report_fatal_error(
+ "Support for cfi_restore_state not implemented! Value of CFA may "
+ "be incorrect!\n");
+#endif
+ break;
+ // Other CFI directives do not affect CFA value.
+ case MCCFIInstruction::OpSameValue:
+ case MCCFIInstruction::OpOffset:
+ case MCCFIInstruction::OpRelOffset:
+ case MCCFIInstruction::OpEscape:
+ case MCCFIInstruction::OpRestore:
+ case MCCFIInstruction::OpUndefined:
+ case MCCFIInstruction::OpRegister:
+ case MCCFIInstruction::OpWindowSave:
+ case MCCFIInstruction::OpGnuArgsSize:
+ break;
+ }
+ }
+ }
+
+ MBBInfo.Processed = true;
+
+ // Update outgoing CFA info.
+ MBBInfo.OutgoingCFAOffset = SetOffset;
+ MBBInfo.OutgoingCFARegister = SetRegister;
+}
+
+void CFIInstrInserter::updateSuccCFAInfo(MBBCFAInfo &MBBInfo) {
+ SmallVector<MachineBasicBlock *, 4> Stack;
+ Stack.push_back(MBBInfo.MBB);
+
+ do {
+ MachineBasicBlock *Current = Stack.pop_back_val();
+ MBBCFAInfo &CurrentInfo = MBBVector[Current->getNumber()];
+ if (CurrentInfo.Processed)
+ continue;
+
+ calculateOutgoingCFAInfo(CurrentInfo);
+ for (auto *Succ : CurrentInfo.MBB->successors()) {
+ MBBCFAInfo &SuccInfo = MBBVector[Succ->getNumber()];
+ if (!SuccInfo.Processed) {
+ SuccInfo.IncomingCFAOffset = CurrentInfo.OutgoingCFAOffset;
+ SuccInfo.IncomingCFARegister = CurrentInfo.OutgoingCFARegister;
+ Stack.push_back(Succ);
+ }
+ }
+ } while (!Stack.empty());
+}
+
+bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) {
+ const MBBCFAInfo *PrevMBBInfo = &MBBVector[MF.front().getNumber()];
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ bool InsertedCFIInstr = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ // Skip the first MBB in a function
+ if (MBB.getNumber() == MF.front().getNumber()) continue;
+
+ const MBBCFAInfo &MBBInfo = MBBVector[MBB.getNumber()];
+ auto MBBI = MBBInfo.MBB->begin();
+ DebugLoc DL = MBBInfo.MBB->findDebugLoc(MBBI);
+
+ if (PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset) {
+ // If both outgoing offset and register of a previous block don't match
+ // incoming offset and register of this block, add a def_cfa instruction
+ // with the correct offset and register for this block.
+ if (PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) {
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
+ nullptr, MBBInfo.IncomingCFARegister, getCorrectCFAOffset(&MBB)));
+ BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ // If outgoing offset of a previous block doesn't match incoming offset
+ // of this block, add a def_cfa_offset instruction with the correct
+ // offset for this block.
+ } else {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(
+ nullptr, getCorrectCFAOffset(&MBB)));
+ BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+ InsertedCFIInstr = true;
+ // If outgoing register of a previous block doesn't match incoming
+ // register of this block, add a def_cfa_register instruction with the
+ // correct register for this block.
+ } else if (PrevMBBInfo->OutgoingCFARegister !=
+ MBBInfo.IncomingCFARegister) {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+ nullptr, MBBInfo.IncomingCFARegister));
+ BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ InsertedCFIInstr = true;
+ }
+ PrevMBBInfo = &MBBInfo;
+ }
+ return InsertedCFIInstr;
+}
+
+void CFIInstrInserter::report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ) {
+ errs() << "*** Inconsistent CFA register and/or offset between pred and succ "
+ "***\n";
+ errs() << "Pred: " << Pred.MBB->getName() << " #" << Pred.MBB->getNumber()
+ << " in " << Pred.MBB->getParent()->getName()
+ << " outgoing CFA Reg:" << Pred.OutgoingCFARegister << "\n";
+ errs() << "Pred: " << Pred.MBB->getName() << " #" << Pred.MBB->getNumber()
+ << " in " << Pred.MBB->getParent()->getName()
+ << " outgoing CFA Offset:" << Pred.OutgoingCFAOffset << "\n";
+ errs() << "Succ: " << Succ.MBB->getName() << " #" << Succ.MBB->getNumber()
+ << " incoming CFA Reg:" << Succ.IncomingCFARegister << "\n";
+ errs() << "Succ: " << Succ.MBB->getName() << " #" << Succ.MBB->getNumber()
+ << " incoming CFA Offset:" << Succ.IncomingCFAOffset << "\n";
+}
+
+unsigned CFIInstrInserter::verify(MachineFunction &MF) {
+ unsigned ErrorNum = 0;
+ for (auto *CurrMBB : depth_first(&MF)) {
+ const MBBCFAInfo &CurrMBBInfo = MBBVector[CurrMBB->getNumber()];
+ for (MachineBasicBlock *Succ : CurrMBB->successors()) {
+ const MBBCFAInfo &SuccMBBInfo = MBBVector[Succ->getNumber()];
+ // Check that incoming offset and register values of successors match the
+ // outgoing offset and register values of CurrMBB
+ if (SuccMBBInfo.IncomingCFAOffset != CurrMBBInfo.OutgoingCFAOffset ||
+ SuccMBBInfo.IncomingCFARegister != CurrMBBInfo.OutgoingCFARegister) {
+ report(CurrMBBInfo, SuccMBBInfo);
+ ErrorNum++;
+ }
+ }
+ }
+ return ErrorNum;
+}
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
index b8920a601938..57541182cab2 100644
--- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -35,8 +35,8 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
const MachineLoopInfo &MLI,
const MachineBlockFrequencyInfo &MBFI,
VirtRegAuxInfo::NormalizingFn norm) {
- DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
- << "********** Function: " << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
+ << "********** Function: " << MF.getName() << '\n');
MachineRegisterInfo &MRI = MF.getRegInfo();
VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm);
@@ -236,7 +236,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
continue;
numInstr++;
- if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
+ if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugInstr())
continue;
if (!visited.insert(mi).second)
continue;
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index 879cd2859ee9..2f845354c570 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -23,11 +23,14 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeAtomicExpandPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
+ initializeCFIInstrInserterPass(Registry);
initializeCodeGenPreparePass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
initializeDetectDeadLanesPass(Registry);
initializeDwarfEHPreparePass(Registry);
initializeEarlyIfConverterPass(Registry);
+ initializeEarlyMachineLICMPass(Registry);
+ initializeEarlyTailDuplicatePass(Registry);
initializeExpandISelPseudosPass(Registry);
initializeExpandMemCmpPassPass(Registry);
initializeExpandPostRAPass(Registry);
@@ -48,6 +51,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeLiveVariablesPass(Registry);
initializeLocalStackSlotPassPass(Registry);
initializeLowerIntrinsicsPass(Registry);
+ initializeMIRCanonicalizerPass(Registry);
initializeMachineBlockFrequencyInfoPass(Registry);
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
@@ -74,12 +78,15 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializePeepholeOptimizerPass(Registry);
initializePostMachineSchedulerPass(Registry);
initializePostRAHazardRecognizerPass(Registry);
+ initializePostRAMachineSinkingPass(Registry);
initializePostRASchedulerPass(Registry);
initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
initializeProcessImplicitDefsPass(Registry);
initializeRABasicPass(Registry);
- initializeRegAllocFastPass(Registry);
initializeRAGreedyPass(Registry);
+ initializeRegAllocFastPass(Registry);
+ initializeRegUsageInfoCollectorPass(Registry);
+ initializeRegUsageInfoPropagationPass(Registry);
initializeRegisterCoalescerPass(Registry);
initializeRenameIndependentSubregsPass(Registry);
initializeSafeStackLegacyPassPass(Registry);
@@ -90,7 +97,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeStackMapLivenessPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
- initializeTailDuplicatePassPass(Registry);
+ initializeTailDuplicatePass(Registry);
initializeTargetPassConfigPass(Registry);
initializeTwoAddressInstructionPassPass(Registry);
initializeUnpackMachineBundlesPass(Registry);
@@ -98,9 +105,9 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeUnreachableMachineBlockElimPass(Registry);
initializeVirtRegMapPass(Registry);
initializeVirtRegRewriterPass(Registry);
+ initializeWasmEHPreparePass(Registry);
initializeWinEHPreparePass(Registry);
initializeXRayInstrumentationPass(Registry);
- initializeMIRCanonicalizerPass(Registry);
}
void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 26ca8d4ee88c..c41beb094604 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -30,15 +30,16 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -79,13 +80,13 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include <algorithm>
#include <cassert>
@@ -196,7 +197,7 @@ AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
cl::desc("Allow creation of Phis in Address sinking."));
static cl::opt<bool>
-AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(false),
+AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true),
cl::desc("Allow creation of selects in Address sinking."));
static cl::opt<bool> AddrSinkCombineBaseReg(
@@ -215,6 +216,11 @@ static cl::opt<bool> AddrSinkCombineScaledReg(
"addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
cl::desc("Allow combining of ScaledReg field in Address sinking."));
+static cl::opt<bool>
+ EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
+ cl::init(true),
+ cl::desc("Enable splitting large offset of GEP."));
+
namespace {
using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
@@ -260,6 +266,20 @@ class TypePromotionTransaction;
/// Keep track of sext chains based on their initial value.
DenseMap<Value *, Instruction *> SeenChainsForSExt;
+ /// Keep track of GEPs accessing the same data structures such as structs or
+ /// arrays that are candidates to be split later because of their large
+ /// size.
+ DenseMap<
+ AssertingVH<Value>,
+ SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
+ LargeOffsetGEPMap;
+
+ /// Keep track of new GEP base after splitting the GEPs having large offset.
+ SmallSet<AssertingVH<Value>, 2> NewGEPBases;
+
+ /// Map serial numbers to Large offset GEPs.
+ DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
+
/// Keep track of SExt promoted.
ValueToSExts ValToSExtendedUses;
@@ -301,16 +321,16 @@ class TypePromotionTransaction;
bool isPreheader);
bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
bool optimizeInst(Instruction *I, bool &ModifiedDT);
- bool optimizeMemoryInst(Instruction *I, Value *Addr,
- Type *AccessTy, unsigned AS);
+ bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+ Type *AccessTy, unsigned AddrSpace);
bool optimizeInlineAsmInst(CallInst *CS);
bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
bool optimizeExt(Instruction *&I);
bool optimizeExtUses(Instruction *I);
- bool optimizeLoadExt(LoadInst *I);
+ bool optimizeLoadExt(LoadInst *Load);
bool optimizeSelectInst(SelectInst *SI);
- bool optimizeShuffleVectorInst(ShuffleVectorInst *SI);
- bool optimizeSwitchInst(SwitchInst *CI);
+ bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
+ bool optimizeSwitchInst(SwitchInst *SI);
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB);
bool placeDbgValues(Function &F);
@@ -321,6 +341,7 @@ class TypePromotionTransaction;
SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
unsigned CreatedInstsCost = 0);
bool mergeSExts(Function &F);
+ bool splitLargeGEPOffsets();
bool performAddressTypePromotion(
Instruction *&Inst,
bool AllowPromotionWithoutCommonHeader,
@@ -414,6 +435,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
SeenChainsForSExt.clear();
ValToSExtendedUses.clear();
RemovedInsts.clear();
+ LargeOffsetGEPMap.clear();
+ LargeOffsetGEPID.clear();
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
@@ -425,6 +448,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
}
if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
MadeChange |= mergeSExts(F);
+ if (!LargeOffsetGEPMap.empty())
+ MadeChange |= splitLargeGEPOffsets();
// Really free removed instructions during promotion.
for (Instruction *I : RemovedInsts)
@@ -437,7 +462,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (!DisableBranchOpts) {
MadeChange = false;
- SmallPtrSet<BasicBlock*, 8> WorkList;
+ // Use a set vector to get deterministic iteration order. The order the
+ // blocks are removed may affect whether or not PHI nodes in successors
+ // are removed.
+ SmallSetVector<BasicBlock*, 8> WorkList;
for (BasicBlock &BB : F) {
SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
MadeChange |= ConstantFoldTerminator(&BB, true);
@@ -452,8 +480,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Delete the dead blocks and any of their dead successors.
MadeChange |= !WorkList.empty();
while (!WorkList.empty()) {
- BasicBlock *BB = *WorkList.begin();
- WorkList.erase(BB);
+ BasicBlock *BB = WorkList.pop_back_val();
SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
DeleteDeadBlock(BB);
@@ -491,8 +518,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
bool CodeGenPrepare::eliminateFallThrough(Function &F) {
bool Changed = false;
// Scan all of the blocks in the function, except for the entry block.
- for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
- BasicBlock *BB = &*I++;
+ // Use a temporary array to avoid iterator being invalidated when
+ // deleting blocks.
+ SmallVector<WeakTrackingVH, 16> Blocks;
+ for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
+ Blocks.push_back(&Block);
+
+ for (auto &Block : Blocks) {
+ auto *BB = cast_or_null<BasicBlock>(Block);
+ if (!BB)
+ continue;
// If the destination block has a single pred, then this is a trivial
// edge, just collapse it.
BasicBlock *SinglePred = BB->getSinglePredecessor();
@@ -503,17 +538,10 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
if (Term && !Term->isConditional()) {
Changed = true;
- DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
- // Remember if SinglePred was the entry block of the function.
- // If so, we will need to move BB back to the entry position.
- bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(BB, nullptr);
-
- if (isEntry && BB != &BB->getParent()->getEntryBlock())
- BB->moveBefore(&BB->getParent()->getEntryBlock());
+ LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
- // We have erased a block. Update the iterator.
- I = BB->getIterator();
+ // Merge BB into SinglePred and delete it.
+ MergeBlockIntoPredecessor(BB);
}
}
return Changed;
@@ -566,9 +594,17 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
}
bool MadeChange = false;
+ // Copy blocks into a temporary array to avoid iterator invalidation issues
+ // as we remove them.
// Note that this intentionally skips the entry block.
- for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
- BasicBlock *BB = &*I++;
+ SmallVector<WeakTrackingVH, 16> Blocks;
+ for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
+ Blocks.push_back(&Block);
+
+ for (auto &Block : Blocks) {
+ BasicBlock *BB = cast_or_null<BasicBlock>(Block);
+ if (!BB)
+ continue;
BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
if (!DestBB ||
!isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
@@ -730,21 +766,20 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
BranchInst *BI = cast<BranchInst>(BB->getTerminator());
BasicBlock *DestBB = BI->getSuccessor(0);
- DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
+ LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
+ << *BB << *DestBB);
// If the destination block has a single pred, then this is a trivial edge,
// just collapse it.
if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
if (SinglePred != DestBB) {
- // Remember if SinglePred was the entry block of the function. If so, we
- // will need to move BB back to the entry position.
- bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
-
- if (isEntry && BB != &BB->getParent()->getEntryBlock())
- BB->moveBefore(&BB->getParent()->getEntryBlock());
-
- DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+ assert(SinglePred == BB &&
+ "Single predecessor not the same as predecessor");
+ // Merge DestBB into SinglePred/BB and delete it.
+ MergeBlockIntoPredecessor(DestBB);
+ // Note: BB(=SinglePred) will not be deleted on this path.
+ // DestBB(=its single successor) is the one that was deleted.
+ LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
return;
}
}
@@ -782,7 +817,7 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
BB->eraseFromParent();
++NumBlocksElim;
- DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+ LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
}
// Computes a map of base pointer relocation instructions to corresponding
@@ -1024,6 +1059,7 @@ static bool SinkCast(CastInst *CI) {
assert(InsertPt != UserBB->end());
InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
CI->getType(), "", &*InsertPt);
+ InsertedCast->setDebugLoc(CI->getDebugLoc());
}
// Replace a use of the cast with a use of the new cast.
@@ -1247,8 +1283,8 @@ static bool sinkAndCmp0Expression(Instruction *AndI,
if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
return false;
- DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
- DEBUG(AndI->getParent()->dump());
+ LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
+ LLVM_DEBUG(AndI->getParent()->dump());
// Push the 'and' into the same block as the icmp 0. There should only be
// one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
@@ -1261,7 +1297,7 @@ static bool sinkAndCmp0Expression(Instruction *AndI,
// Preincrement use iterator so we don't invalidate it.
++UI;
- DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
+ LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
// Keep the 'and' in the same place if the use is already in the same block.
Instruction *InsertPt =
@@ -1275,7 +1311,7 @@ static bool sinkAndCmp0Expression(Instruction *AndI,
// Replace a use of the 'and' with a use of the new 'and'.
TheUse = InsertedAnd;
++NumAndUses;
- DEBUG(User->getParent()->dump());
+ LLVM_DEBUG(User->getParent()->dump());
}
// We removed all uses, nuke the and.
@@ -1388,7 +1424,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
/// %x.extract.shift.1 = lshr i64 %arg1, 32
/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
///
-/// CodeGen will recoginze the pattern in BB2 and generate BitExtract
+/// CodeGen will recognize the pattern in BB2 and generate BitExtract
/// instruction.
/// Return true if any changes are made.
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
@@ -1434,7 +1470,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
// cmp i16 trunc.result, opnd2
//
if (isa<TruncInst>(User) && shiftIsLegal
- // If the type of the truncate is legal, no trucate will be
+ // If the type of the truncate is legal, no truncate will be
// introduced in other basic blocks.
&&
(!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
@@ -1581,7 +1617,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// if size - offset meets the size threshold.
if (!Arg->getType()->isPointerTy())
continue;
- APInt Offset(DL->getPointerSizeInBits(
+ APInt Offset(DL->getIndexSizeInBits(
cast<PointerType>(Arg->getType())->getAddressSpace()),
0);
Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
@@ -1606,11 +1642,14 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// If this is a memcpy (or similar) then we may be able to improve the
// alignment
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
- unsigned Align = getKnownAlignment(MI->getDest(), *DL);
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
- Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL));
- if (Align > MI->getAlignment())
- MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
+ unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL);
+ if (DestAlign > MI->getDestAlignment())
+ MI->setDestAlignment(DestAlign);
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
+ unsigned SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
+ if (SrcAlign > MTI->getSourceAlignment())
+ MTI->setSourceAlignment(SrcAlign);
+ }
}
}
@@ -1664,7 +1703,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
InsertedInsts.insert(ExtVal);
return true;
}
- case Intrinsic::invariant_group_barrier:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
II->replaceAllUsesWith(II->getArgOperand(0));
II->eraseFromParent();
return true;
@@ -2018,11 +2058,11 @@ LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
namespace {
-/// \brief This class provides transaction based operation on the IR.
+/// This class provides transaction based operation on the IR.
/// Every change made through this class is recorded in the internal state and
/// can be undone (rollback) until commit is called.
class TypePromotionTransaction {
- /// \brief This represents the common interface of the individual transaction.
+ /// This represents the common interface of the individual transaction.
/// Each class implements the logic for doing one specific modification on
/// the IR via the TypePromotionTransaction.
class TypePromotionAction {
@@ -2031,20 +2071,20 @@ class TypePromotionTransaction {
Instruction *Inst;
public:
- /// \brief Constructor of the action.
+ /// Constructor of the action.
/// The constructor performs the related action on the IR.
TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
virtual ~TypePromotionAction() = default;
- /// \brief Undo the modification done by this action.
+ /// Undo the modification done by this action.
/// When this method is called, the IR must be in the same state as it was
/// before this action was applied.
/// \pre Undoing the action works if and only if the IR is in the exact same
/// state as it was directly after this action was applied.
virtual void undo() = 0;
- /// \brief Advocate every change made by this action.
+ /// Advocate every change made by this action.
/// When the results on the IR of the action are to be kept, it is important
/// to call this function, otherwise hidden information may be kept forever.
virtual void commit() {
@@ -2052,12 +2092,12 @@ class TypePromotionTransaction {
}
};
- /// \brief Utility to remember the position of an instruction.
+ /// Utility to remember the position of an instruction.
class InsertionHandler {
/// Position of an instruction.
/// Either an instruction:
/// - Is the first in a basic block: BB is used.
- /// - Has a previous instructon: PrevInst is used.
+ /// - Has a previous instruction: PrevInst is used.
union {
Instruction *PrevInst;
BasicBlock *BB;
@@ -2067,7 +2107,7 @@ class TypePromotionTransaction {
bool HasPrevInstruction;
public:
- /// \brief Record the position of \p Inst.
+ /// Record the position of \p Inst.
InsertionHandler(Instruction *Inst) {
BasicBlock::iterator It = Inst->getIterator();
HasPrevInstruction = (It != (Inst->getParent()->begin()));
@@ -2077,7 +2117,7 @@ class TypePromotionTransaction {
Point.BB = Inst->getParent();
}
- /// \brief Insert \p Inst at the recorded position.
+ /// Insert \p Inst at the recorded position.
void insert(Instruction *Inst) {
if (HasPrevInstruction) {
if (Inst->getParent())
@@ -2093,27 +2133,28 @@ class TypePromotionTransaction {
}
};
- /// \brief Move an instruction before another.
+ /// Move an instruction before another.
class InstructionMoveBefore : public TypePromotionAction {
/// Original position of the instruction.
InsertionHandler Position;
public:
- /// \brief Move \p Inst before \p Before.
+ /// Move \p Inst before \p Before.
InstructionMoveBefore(Instruction *Inst, Instruction *Before)
: TypePromotionAction(Inst), Position(Inst) {
- DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n");
+ LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
+ << "\n");
Inst->moveBefore(Before);
}
- /// \brief Move the instruction back to its original position.
+ /// Move the instruction back to its original position.
void undo() override {
- DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
+ LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
Position.insert(Inst);
}
};
- /// \brief Set the operand of an instruction with a new value.
+ /// Set the operand of an instruction with a new value.
class OperandSetter : public TypePromotionAction {
/// Original operand of the instruction.
Value *Origin;
@@ -2122,35 +2163,35 @@ class TypePromotionTransaction {
unsigned Idx;
public:
- /// \brief Set \p Idx operand of \p Inst with \p NewVal.
+ /// Set \p Idx operand of \p Inst with \p NewVal.
OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
: TypePromotionAction(Inst), Idx(Idx) {
- DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
- << "for:" << *Inst << "\n"
- << "with:" << *NewVal << "\n");
+ LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
+ << "for:" << *Inst << "\n"
+ << "with:" << *NewVal << "\n");
Origin = Inst->getOperand(Idx);
Inst->setOperand(Idx, NewVal);
}
- /// \brief Restore the original value of the instruction.
+ /// Restore the original value of the instruction.
void undo() override {
- DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
- << "for: " << *Inst << "\n"
- << "with: " << *Origin << "\n");
+ LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
+ << "for: " << *Inst << "\n"
+ << "with: " << *Origin << "\n");
Inst->setOperand(Idx, Origin);
}
};
- /// \brief Hide the operands of an instruction.
+ /// Hide the operands of an instruction.
/// Do as if this instruction was not using any of its operands.
class OperandsHider : public TypePromotionAction {
/// The list of original operands.
SmallVector<Value *, 4> OriginalValues;
public:
- /// \brief Remove \p Inst from the uses of the operands of \p Inst.
+ /// Remove \p Inst from the uses of the operands of \p Inst.
OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
- DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
+ LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
unsigned NumOpnds = Inst->getNumOperands();
OriginalValues.reserve(NumOpnds);
for (unsigned It = 0; It < NumOpnds; ++It) {
@@ -2164,114 +2205,114 @@ class TypePromotionTransaction {
}
}
- /// \brief Restore the original list of uses.
+ /// Restore the original list of uses.
void undo() override {
- DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
+ LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
Inst->setOperand(It, OriginalValues[It]);
}
};
- /// \brief Build a truncate instruction.
+ /// Build a truncate instruction.
class TruncBuilder : public TypePromotionAction {
Value *Val;
public:
- /// \brief Build a truncate instruction of \p Opnd producing a \p Ty
+ /// Build a truncate instruction of \p Opnd producing a \p Ty
/// result.
/// trunc Opnd to Ty.
TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
IRBuilder<> Builder(Opnd);
Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
- DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
+ LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
}
- /// \brief Get the built value.
+ /// Get the built value.
Value *getBuiltValue() { return Val; }
- /// \brief Remove the built instruction.
+ /// Remove the built instruction.
void undo() override {
- DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
+ LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
if (Instruction *IVal = dyn_cast<Instruction>(Val))
IVal->eraseFromParent();
}
};
- /// \brief Build a sign extension instruction.
+ /// Build a sign extension instruction.
class SExtBuilder : public TypePromotionAction {
Value *Val;
public:
- /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
+ /// Build a sign extension instruction of \p Opnd producing a \p Ty
/// result.
/// sext Opnd to Ty.
SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
: TypePromotionAction(InsertPt) {
IRBuilder<> Builder(InsertPt);
Val = Builder.CreateSExt(Opnd, Ty, "promoted");
- DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
+ LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
}
- /// \brief Get the built value.
+ /// Get the built value.
Value *getBuiltValue() { return Val; }
- /// \brief Remove the built instruction.
+ /// Remove the built instruction.
void undo() override {
- DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
+ LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
if (Instruction *IVal = dyn_cast<Instruction>(Val))
IVal->eraseFromParent();
}
};
- /// \brief Build a zero extension instruction.
+ /// Build a zero extension instruction.
class ZExtBuilder : public TypePromotionAction {
Value *Val;
public:
- /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty
+ /// Build a zero extension instruction of \p Opnd producing a \p Ty
/// result.
/// zext Opnd to Ty.
ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
: TypePromotionAction(InsertPt) {
IRBuilder<> Builder(InsertPt);
Val = Builder.CreateZExt(Opnd, Ty, "promoted");
- DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
+ LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
}
- /// \brief Get the built value.
+ /// Get the built value.
Value *getBuiltValue() { return Val; }
- /// \brief Remove the built instruction.
+ /// Remove the built instruction.
void undo() override {
- DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
+ LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
if (Instruction *IVal = dyn_cast<Instruction>(Val))
IVal->eraseFromParent();
}
};
- /// \brief Mutate an instruction to another type.
+ /// Mutate an instruction to another type.
class TypeMutator : public TypePromotionAction {
/// Record the original type.
Type *OrigTy;
public:
- /// \brief Mutate the type of \p Inst into \p NewTy.
+ /// Mutate the type of \p Inst into \p NewTy.
TypeMutator(Instruction *Inst, Type *NewTy)
: TypePromotionAction(Inst), OrigTy(Inst->getType()) {
- DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
- << "\n");
+ LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
+ << "\n");
Inst->mutateType(NewTy);
}
- /// \brief Mutate the instruction back to its original type.
+ /// Mutate the instruction back to its original type.
void undo() override {
- DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
- << "\n");
+ LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
+ << "\n");
Inst->mutateType(OrigTy);
}
};
- /// \brief Replace the uses of an instruction by another instruction.
+ /// Replace the uses of an instruction by another instruction.
class UsesReplacer : public TypePromotionAction {
/// Helper structure to keep track of the replaced uses.
struct InstructionAndIdx {
@@ -2291,10 +2332,10 @@ class TypePromotionTransaction {
using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
public:
- /// \brief Replace all the use of \p Inst by \p New.
+ /// Replace all the use of \p Inst by \p New.
UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
- DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
- << "\n");
+ LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
+ << "\n");
// Record the original uses.
for (Use &U : Inst->uses()) {
Instruction *UserI = cast<Instruction>(U.getUser());
@@ -2304,9 +2345,9 @@ class TypePromotionTransaction {
Inst->replaceAllUsesWith(New);
}
- /// \brief Reassign the original uses of Inst to Inst.
+ /// Reassign the original uses of Inst to Inst.
void undo() override {
- DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
+ LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
for (use_iterator UseIt = OriginalUses.begin(),
EndIt = OriginalUses.end();
UseIt != EndIt; ++UseIt) {
@@ -2315,7 +2356,7 @@ class TypePromotionTransaction {
}
};
- /// \brief Remove an instruction from the IR.
+ /// Remove an instruction from the IR.
class InstructionRemover : public TypePromotionAction {
/// Original position of the instruction.
InsertionHandler Inserter;
@@ -2331,7 +2372,7 @@ class TypePromotionTransaction {
SetOfInstrs &RemovedInsts;
public:
- /// \brief Remove all reference of \p Inst and optinally replace all its
+ /// Remove all reference of \p Inst and optionally replace all its
/// uses with New.
/// \p RemovedInsts Keep track of the instructions removed by this Action.
/// \pre If !Inst->use_empty(), then New != nullptr
@@ -2341,7 +2382,7 @@ class TypePromotionTransaction {
RemovedInsts(RemovedInsts) {
if (New)
Replacer = new UsesReplacer(Inst, New);
- DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
+ LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
RemovedInsts.insert(Inst);
/// The instructions removed here will be freed after completing
/// optimizeBlock() for all blocks as we need to keep track of the
@@ -2351,10 +2392,10 @@ class TypePromotionTransaction {
~InstructionRemover() override { delete Replacer; }
- /// \brief Resurrect the instruction and reassign it to the proper uses if
+ /// Resurrect the instruction and reassign it to the proper uses if
/// new value was provided when build this action.
void undo() override {
- DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
+ LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
Inserter.insert(Inst);
if (Replacer)
Replacer->undo();
@@ -2496,7 +2537,7 @@ void TypePromotionTransaction::rollback(
namespace {
-/// \brief A helper class for matching addressing modes.
+/// A helper class for matching addressing modes.
///
/// This encapsulates the logic for matching the target-legal addressing modes.
class AddressingModeMatcher {
@@ -2524,22 +2565,23 @@ class AddressingModeMatcher {
/// The ongoing transaction where every action should be registered.
TypePromotionTransaction &TPT;
+ // A GEP which has too large offset to be folded into the addressing mode.
+ std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
+
/// This is set to true when we should not do profitability checks.
/// When true, IsProfitableToFoldIntoAddressingMode always returns true.
bool IgnoreProfitability;
- AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
- const TargetLowering &TLI,
- const TargetRegisterInfo &TRI,
- Type *AT, unsigned AS,
- Instruction *MI, ExtAddrMode &AM,
- const SetOfInstrs &InsertedInsts,
- InstrToOrigTy &PromotedInsts,
- TypePromotionTransaction &TPT)
+ AddressingModeMatcher(
+ SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
+ ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
+ InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
+ std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP)
: AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
- PromotedInsts(PromotedInsts), TPT(TPT) {
+ PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) {
IgnoreProfitability = false;
}
@@ -2551,28 +2593,27 @@ public:
/// optimizations.
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p The ongoing transaction where every action should be registered.
- static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,
- Instruction *MemoryInst,
- SmallVectorImpl<Instruction*> &AddrModeInsts,
- const TargetLowering &TLI,
- const TargetRegisterInfo &TRI,
- const SetOfInstrs &InsertedInsts,
- InstrToOrigTy &PromotedInsts,
- TypePromotionTransaction &TPT) {
+ static ExtAddrMode
+ Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
+ SmallVectorImpl<Instruction *> &AddrModeInsts,
+ const TargetLowering &TLI, const TargetRegisterInfo &TRI,
+ const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
+ TypePromotionTransaction &TPT,
+ std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) {
ExtAddrMode Result;
- bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI,
- AccessTy, AS,
+ bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
MemoryInst, Result, InsertedInsts,
- PromotedInsts, TPT).matchAddr(V, 0);
+ PromotedInsts, TPT, LargeOffsetGEP)
+ .matchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
return Result;
}
private:
bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
- bool matchAddr(Value *V, unsigned Depth);
- bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
+ bool matchAddr(Value *Addr, unsigned Depth);
+ bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
bool *MovedAway = nullptr);
bool isProfitableToFoldIntoAddressingMode(Instruction *I,
ExtAddrMode &AMBefore,
@@ -2582,20 +2623,21 @@ private:
Value *PromotedOperand) const;
};
-/// \brief Keep track of simplification of Phi nodes.
+/// Keep track of simplification of Phi nodes.
/// Accept the set of all phi nodes and erase phi node from this set
/// if it is simplified.
class SimplificationTracker {
DenseMap<Value *, Value *> Storage;
const SimplifyQuery &SQ;
- SmallPtrSetImpl<PHINode *> &AllPhiNodes;
- SmallPtrSetImpl<SelectInst *> &AllSelectNodes;
+ // Tracks newly created Phi nodes. We use a SetVector to get deterministic
+ // order when iterating over the set in MatchPhiSet.
+ SmallSetVector<PHINode *, 32> AllPhiNodes;
+ // Tracks newly created Select nodes.
+ SmallPtrSet<SelectInst *, 32> AllSelectNodes;
public:
- SimplificationTracker(const SimplifyQuery &sq,
- SmallPtrSetImpl<PHINode *> &APN,
- SmallPtrSetImpl<SelectInst *> &ASN)
- : SQ(sq), AllPhiNodes(APN), AllSelectNodes(ASN) {}
+ SimplificationTracker(const SimplifyQuery &sq)
+ : SQ(sq) {}
Value *Get(Value *V) {
do {
@@ -2621,7 +2663,7 @@ public:
Put(PI, V);
PI->replaceAllUsesWith(V);
if (auto *PHI = dyn_cast<PHINode>(PI))
- AllPhiNodes.erase(PHI);
+ AllPhiNodes.remove(PHI);
if (auto *Select = dyn_cast<SelectInst>(PI))
AllSelectNodes.erase(Select);
PI->eraseFromParent();
@@ -2633,9 +2675,48 @@ public:
void Put(Value *From, Value *To) {
Storage.insert({ From, To });
}
+
+ void ReplacePhi(PHINode *From, PHINode *To) {
+ Value* OldReplacement = Get(From);
+ while (OldReplacement != From) {
+ From = To;
+ To = dyn_cast<PHINode>(OldReplacement);
+ OldReplacement = Get(From);
+ }
+ assert(Get(To) == To && "Replacement PHI node is already replaced.");
+ Put(From, To);
+ From->replaceAllUsesWith(To);
+ AllPhiNodes.remove(From);
+ From->eraseFromParent();
+ }
+
+ SmallSetVector<PHINode *, 32>& newPhiNodes() { return AllPhiNodes; }
+
+ void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
+
+ void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
+
+ unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
+
+ unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
+
+ void destroyNewNodes(Type *CommonType) {
+ // For safe erasing, replace the uses with dummy value first.
+ auto Dummy = UndefValue::get(CommonType);
+ for (auto I : AllPhiNodes) {
+ I->replaceAllUsesWith(Dummy);
+ I->eraseFromParent();
+ }
+ AllPhiNodes.clear();
+ for (auto I : AllSelectNodes) {
+ I->replaceAllUsesWith(Dummy);
+ I->eraseFromParent();
+ }
+ AllSelectNodes.clear();
+ }
};
-/// \brief A helper class for combining addressing modes.
+/// A helper class for combining addressing modes.
class AddressingModeCombiner {
typedef std::pair<Value *, BasicBlock *> ValueInBB;
typedef DenseMap<ValueInBB, Value *> FoldAddrToValueMapping;
@@ -2664,12 +2745,12 @@ public:
AddressingModeCombiner(const SimplifyQuery &_SQ, ValueInBB OriginalValue)
: CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
- /// \brief Get the combined AddrMode
+ /// Get the combined AddrMode
const ExtAddrMode &getAddrMode() const {
return AddrModes[0];
}
- /// \brief Add a new AddrMode if it's compatible with the AddrModes we already
+ /// Add a new AddrMode if it's compatible with the AddrModes we already
/// have.
/// \return True iff we succeeded in doing so.
bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
@@ -2694,29 +2775,35 @@ public:
else if (DifferentField != ThisDifferentField)
DifferentField = ExtAddrMode::MultipleFields;
- // If NewAddrMode differs in only one dimension, and that dimension isn't
- // the amount that ScaledReg is scaled by, then we can handle it by
- // inserting a phi/select later on. Even if NewAddMode is the same
- // we still need to collect it due to original value is different.
- // And later we will need all original values as anchors during
- // finding the common Phi node.
+ // If NewAddrMode differs in more than one dimension we cannot handle it.
+ bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
+
+ // If Scale Field is different then we reject.
+ CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
+
// We also must reject the case when base offset is different and
// scale reg is not null, we cannot handle this case due to merge of
// different offsets will be used as ScaleReg.
- if (DifferentField != ExtAddrMode::MultipleFields &&
- DifferentField != ExtAddrMode::ScaleField &&
- (DifferentField != ExtAddrMode::BaseOffsField ||
- !NewAddrMode.ScaledReg)) {
+ CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
+ !NewAddrMode.ScaledReg);
+
+ // We also must reject the case when GV is different and BaseReg installed
+ // due to we want to use base reg as a merge of GV values.
+ CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
+ !NewAddrMode.HasBaseReg);
+
+ // Even if NewAddMode is the same we still need to collect it due to
+ // original value is different. And later we will need all original values
+ // as anchors during finding the common Phi node.
+ if (CanHandle)
AddrModes.emplace_back(NewAddrMode);
- return true;
- }
+ else
+ AddrModes.clear();
- // We couldn't combine NewAddrMode with the rest, so return failure.
- AddrModes.clear();
- return false;
+ return CanHandle;
}
- /// \brief Combine the addressing modes we've collected into a single
+ /// Combine the addressing modes we've collected into a single
/// addressing mode.
/// \return True iff we successfully combined them or we only had one so
/// didn't need to combine them anyway.
@@ -2751,7 +2838,7 @@ public:
}
private:
- /// \brief Initialize Map with anchor values. For address seen in some BB
+ /// Initialize Map with anchor values. For address seen in some BB
/// we set the value of different field saw in this address.
/// If address is not an instruction than basic block is set to null.
/// At the same time we find a common type for different field we will
@@ -2784,9 +2871,9 @@ private:
return true;
}
- /// \brief We have mapping between value A and basic block where value A
+ /// We have mapping between value A and basic block where value A
/// seen to other value B where B was a field in addressing mode represented
- /// by A. Also we have an original value C representin an address in some
+ /// by A. Also we have an original value C representing an address in some
/// basic block. Traversing from C through phi and selects we ended up with
/// A's in a map. This utility function tries to find a value V which is a
/// field in addressing mode C and traversing through phi nodes and selects
@@ -2809,62 +2896,46 @@ private:
// <p, BB3> -> ?
// The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3
Value *findCommon(FoldAddrToValueMapping &Map) {
- // Tracks of new created Phi nodes.
- SmallPtrSet<PHINode *, 32> NewPhiNodes;
- // Tracks of new created Select nodes.
- SmallPtrSet<SelectInst *, 32> NewSelectNodes;
- // Tracks the simplification of new created phi nodes. The reason we use
+ // Tracks the simplification of newly created phi nodes. The reason we use
// this mapping is because we will add new created Phi nodes in AddrToBase.
// Simplification of Phi nodes is recursive, so some Phi node may
// be simplified after we added it to AddrToBase.
// Using this mapping we can find the current value in AddrToBase.
- SimplificationTracker ST(SQ, NewPhiNodes, NewSelectNodes);
+ SimplificationTracker ST(SQ);
// First step, DFS to create PHI nodes for all intermediate blocks.
// Also fill traverse order for the second step.
SmallVector<ValueInBB, 32> TraverseOrder;
- InsertPlaceholders(Map, TraverseOrder, NewPhiNodes, NewSelectNodes);
+ InsertPlaceholders(Map, TraverseOrder, ST);
// Second Step, fill new nodes by merged values and simplify if possible.
FillPlaceholders(Map, TraverseOrder, ST);
- if (!AddrSinkNewSelects && NewSelectNodes.size() > 0) {
- DestroyNodes(NewPhiNodes);
- DestroyNodes(NewSelectNodes);
+ if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
+ ST.destroyNewNodes(CommonType);
return nullptr;
}
// Now we'd like to match New Phi nodes to existed ones.
unsigned PhiNotMatchedCount = 0;
- if (!MatchPhiSet(NewPhiNodes, ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
- DestroyNodes(NewPhiNodes);
- DestroyNodes(NewSelectNodes);
+ if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
+ ST.destroyNewNodes(CommonType);
return nullptr;
}
auto *Result = ST.Get(Map.find(Original)->second);
if (Result) {
- NumMemoryInstsPhiCreated += NewPhiNodes.size() + PhiNotMatchedCount;
- NumMemoryInstsSelectCreated += NewSelectNodes.size();
+ NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
+ NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
}
return Result;
}
- /// \brief Destroy nodes from a set.
- template <typename T> void DestroyNodes(SmallPtrSetImpl<T *> &Instructions) {
- // For safe erasing, replace the Phi with dummy value first.
- auto Dummy = UndefValue::get(CommonType);
- for (auto I : Instructions) {
- I->replaceAllUsesWith(Dummy);
- I->eraseFromParent();
- }
- }
-
- /// \brief Try to match PHI node to Candidate.
+ /// Try to match PHI node to Candidate.
/// Matcher tracks the matched Phi nodes.
bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
- DenseSet<PHIPair> &Matcher,
- SmallPtrSetImpl<PHINode *> &PhiNodesToMatch) {
+ SmallSetVector<PHIPair, 8> &Matcher,
+ SmallSetVector<PHINode *, 32> &PhiNodesToMatch) {
SmallVector<PHIPair, 8> WorkList;
Matcher.insert({ PHI, Candidate });
WorkList.push_back({ PHI, Candidate });
@@ -2908,13 +2979,16 @@ private:
return true;
}
- /// \brief For the given set of PHI nodes try to find their equivalents.
+ /// For the given set of PHI nodes (in the SimplificationTracker) try
+ /// to find their equivalents.
/// Returns false if this matching fails and creation of new Phi is disabled.
- bool MatchPhiSet(SmallPtrSetImpl<PHINode *> &PhiNodesToMatch,
- SimplificationTracker &ST, bool AllowNewPhiNodes,
+ bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
unsigned &PhiNotMatchedCount) {
- DenseSet<PHIPair> Matched;
+ // Use a SetVector for Matched to make sure we do replacements (ReplacePhi)
+ // in a deterministic order below.
+ SmallSetVector<PHIPair, 8> Matched;
SmallPtrSet<PHINode *, 8> WillNotMatch;
+ SmallSetVector<PHINode *, 32> &PhiNodesToMatch = ST.newPhiNodes();
while (PhiNodesToMatch.size()) {
PHINode *PHI = *PhiNodesToMatch.begin();
@@ -2938,12 +3012,8 @@ private:
}
if (IsMatched) {
// Replace all matched values and erase them.
- for (auto MV : Matched) {
- MV.first->replaceAllUsesWith(MV.second);
- PhiNodesToMatch.erase(MV.first);
- ST.Put(MV.first, MV.second);
- MV.first->eraseFromParent();
- }
+ for (auto MV : Matched)
+ ST.ReplacePhi(MV.first, MV.second);
Matched.clear();
continue;
}
@@ -2953,11 +3023,11 @@ private:
// Just remove all seen values in matcher. They will not match anything.
PhiNotMatchedCount += WillNotMatch.size();
for (auto *P : WillNotMatch)
- PhiNodesToMatch.erase(P);
+ PhiNodesToMatch.remove(P);
}
return true;
}
- /// \brief Fill the placeholder with values from predecessors and simplify it.
+ /// Fill the placeholder with values from predecessors and simplify it.
void FillPlaceholders(FoldAddrToValueMapping &Map,
SmallVectorImpl<ValueInBB> &TraverseOrder,
SimplificationTracker &ST) {
@@ -3011,8 +3081,7 @@ private:
/// Also reports and order in what basic blocks have been traversed.
void InsertPlaceholders(FoldAddrToValueMapping &Map,
SmallVectorImpl<ValueInBB> &TraverseOrder,
- SmallPtrSetImpl<PHINode *> &NewPhiNodes,
- SmallPtrSetImpl<SelectInst *> &NewSelectNodes) {
+ SimplificationTracker &ST) {
SmallVector<ValueInBB, 32> Worklist;
assert((isa<PHINode>(Original.first) || isa<SelectInst>(Original.first)) &&
"Address must be a Phi or Select node");
@@ -3038,8 +3107,7 @@ private:
Instruction *CurrentI = cast<Instruction>(CurrentValue);
bool IsDefinedInThisBB = CurrentI->getParent() == CurrentBlock;
- unsigned PredCount =
- std::distance(pred_begin(CurrentBlock), pred_end(CurrentBlock));
+ unsigned PredCount = pred_size(CurrentBlock);
// if Current Value is not defined in this basic block we are interested
// in values in predecessors.
if (!IsDefinedInThisBB) {
@@ -3047,7 +3115,7 @@ private:
PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",
&CurrentBlock->front());
Map[Current] = PHI;
- NewPhiNodes.insert(PHI);
+ ST.insertNewPhi(PHI);
// Add all predecessors in work list.
for (auto B : predecessors(CurrentBlock))
Worklist.push_back({ CurrentValue, B });
@@ -3061,7 +3129,7 @@ private:
SelectInst::Create(OrigSelect->getCondition(), Dummy, Dummy,
OrigSelect->getName(), OrigSelect, OrigSelect);
Map[Current] = Select;
- NewSelectNodes.insert(Select);
+ ST.insertNewSelect(Select);
// We are interested in True and False value in this basic block.
Worklist.push_back({ OrigSelect->getTrueValue(), CurrentBlock });
Worklist.push_back({ OrigSelect->getFalseValue(), CurrentBlock });
@@ -3073,7 +3141,7 @@ private:
PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",
&CurrentBlock->front());
Map[Current] = PHI;
- NewPhiNodes.insert(PHI);
+ ST.insertNewPhi(PHI);
// Add all predecessors in work list.
for (auto B : predecessors(CurrentBlock))
@@ -3167,7 +3235,7 @@ static bool MightBeFoldableInst(Instruction *I) {
// Don't touch identity bitcasts.
if (I->getType() == I->getOperand(0)->getType())
return false;
- return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
+ return I->getType()->isIntOrPtrTy();
case Instruction::PtrToInt:
// PtrToInt is always a noop, as we know that the int type is pointer sized.
return true;
@@ -3187,7 +3255,7 @@ static bool MightBeFoldableInst(Instruction *I) {
}
}
-/// \brief Check whether or not \p Val is a legal instruction for \p TLI.
+/// Check whether or not \p Val is a legal instruction for \p TLI.
/// \note \p Val is assumed to be the product of some type promotion.
/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
/// to be legal, as the non-promoted value would have had the same state.
@@ -3207,9 +3275,9 @@ static bool isPromotedInstructionLegal(const TargetLowering &TLI,
namespace {
-/// \brief Hepler class to perform type promotion.
+/// Hepler class to perform type promotion.
class TypePromotionHelper {
- /// \brief Utility function to check whether or not a sign or zero extension
+ /// Utility function to check whether or not a sign or zero extension
/// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
/// either using the operands of \p Inst or promoting \p Inst.
/// The type of the extension is defined by \p IsSExt.
@@ -3223,13 +3291,13 @@ class TypePromotionHelper {
static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
const InstrToOrigTy &PromotedInsts, bool IsSExt);
- /// \brief Utility function to determine if \p OpIdx should be promoted when
+ /// Utility function to determine if \p OpIdx should be promoted when
/// promoting \p Inst.
static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
return !(isa<SelectInst>(Inst) && OpIdx == 0);
}
- /// \brief Utility function to promote the operand of \p Ext when this
+ /// Utility function to promote the operand of \p Ext when this
/// operand is a promotable trunc or sext or zext.
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p CreatedInstsCost[out] contains the cost of all instructions
@@ -3244,7 +3312,7 @@ class TypePromotionHelper {
SmallVectorImpl<Instruction *> *Exts,
SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
- /// \brief Utility function to promote the operand of \p Ext when this
+ /// Utility function to promote the operand of \p Ext when this
/// operand is promotable and is not a supported trunc or sext.
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p CreatedInstsCost[out] contains the cost of all the instructions
@@ -3290,7 +3358,7 @@ public:
SmallVectorImpl<Instruction *> *Truncs,
const TargetLowering &TLI);
- /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
+ /// Given a sign/zero extend instruction \p Ext, return the appropriate
/// action to promote the operand of \p Ext instead of using Ext.
/// \return NULL if no promotable action is possible with the current
/// sign extension.
@@ -3332,6 +3400,47 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
(IsSExt && BinOp->hasNoSignedWrap())))
return true;
+ // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
+ if ((Inst->getOpcode() == Instruction::And ||
+ Inst->getOpcode() == Instruction::Or))
+ return true;
+
+ // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
+ if (Inst->getOpcode() == Instruction::Xor) {
+ const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));
+ // Make sure it is not a NOT.
+ if (Cst && !Cst->getValue().isAllOnesValue())
+ return true;
+ }
+
+ // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
+ // It may change a poisoned value into a regular value, like
+ // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
+ // poisoned value regular value
+ // It should be OK since undef covers valid value.
+ if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
+ return true;
+
+ // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
+ // It may change a poisoned value into a regular value, like
+ // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
+ // poisoned value regular value
+ // It should be OK since undef covers valid value.
+ if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
+ const Instruction *ExtInst =
+ dyn_cast<const Instruction>(*Inst->user_begin());
+ if (ExtInst->hasOneUse()) {
+ const Instruction *AndInst =
+ dyn_cast<const Instruction>(*ExtInst->user_begin());
+ if (AndInst && AndInst->getOpcode() == Instruction::And) {
+ const ConstantInt *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
+ if (Cst &&
+ Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
+ return true;
+ }
+ }
+ }
+
// Check if we can do the following simplification.
// ext(trunc(opnd)) --> ext(opnd)
if (!isa<TruncInst>(Inst))
@@ -3496,19 +3605,19 @@ Value *TypePromotionHelper::promoteOperandForOther(
// Step #3.
Instruction *ExtForOpnd = Ext;
- DEBUG(dbgs() << "Propagate Ext to operands\n");
+ LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
++OpIdx) {
- DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
+ LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
!shouldExtOperand(ExtOpnd, OpIdx)) {
- DEBUG(dbgs() << "No need to propagate\n");
+ LLVM_DEBUG(dbgs() << "No need to propagate\n");
continue;
}
// Check if we can statically extend the operand.
Value *Opnd = ExtOpnd->getOperand(OpIdx);
if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
- DEBUG(dbgs() << "Statically extend\n");
+ LLVM_DEBUG(dbgs() << "Statically extend\n");
unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
: Cst->getValue().zext(BitWidth);
@@ -3517,16 +3626,16 @@ Value *TypePromotionHelper::promoteOperandForOther(
}
// UndefValue are typed, so we have to statically sign extend them.
if (isa<UndefValue>(Opnd)) {
- DEBUG(dbgs() << "Statically extend\n");
+ LLVM_DEBUG(dbgs() << "Statically extend\n");
TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
continue;
}
- // Otherwise we have to explicity sign extend the operand.
+ // Otherwise we have to explicitly sign extend the operand.
// Check if Ext was reused to extend an operand.
if (!ExtForOpnd) {
// If yes, create a new one.
- DEBUG(dbgs() << "More operands to ext\n");
+ LLVM_DEBUG(dbgs() << "More operands to ext\n");
Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
: TPT.createZExt(Ext, Opnd, Ext->getType());
if (!isa<Instruction>(ValForExtOpnd)) {
@@ -3547,7 +3656,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
ExtForOpnd = nullptr;
}
if (ExtForOpnd == Ext) {
- DEBUG(dbgs() << "Extension is useless now\n");
+ LLVM_DEBUG(dbgs() << "Extension is useless now\n");
TPT.eraseInstruction(Ext);
}
return ExtOpnd;
@@ -3563,7 +3672,8 @@ Value *TypePromotionHelper::promoteOperandForOther(
/// \return True if the promotion is profitable, false otherwise.
bool AddressingModeMatcher::isPromotionProfitable(
unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
- DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
+ LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
+ << '\n');
// The cost of the new extensions is greater than the cost of the
// old extension plus what we folded.
// This is not profitable.
@@ -3613,8 +3723,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
case Instruction::BitCast:
// BitCast is always a noop, and we can handle it as long as it is
// int->int or pointer->pointer (we don't want int<->fp or something).
- if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
- AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
+ if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
// Don't touch identity bitcasts. These were probably put here by LSR,
// and we don't want to mess around with them. Assume it knows what it
// is doing.
@@ -3714,6 +3823,30 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
// Check to see if we can fold the base pointer in too.
if (matchAddr(AddrInst->getOperand(0), Depth+1))
return true;
+ } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
+ TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
+ ConstantOffset > 0) {
+ // Record GEPs with non-zero offsets as candidates for splitting in the
+ // event that the offset cannot fit into the r+i addressing mode.
+ // Simple and common case that only one GEP is used in calculating the
+ // address for the memory access.
+ Value *Base = AddrInst->getOperand(0);
+ auto *BaseI = dyn_cast<Instruction>(Base);
+ auto *GEP = cast<GetElementPtrInst>(AddrInst);
+ if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
+ (BaseI && !isa<CastInst>(BaseI) &&
+ !isa<GetElementPtrInst>(BaseI))) {
+ // If the base is an instruction, make sure the GEP is not in the same
+ // basic block as the base. If the base is an argument or global
+ // value, make sure the GEP is not in the entry block. Otherwise,
+ // instruction selection can undo the split. Also make sure the
+ // parent block allows inserting non-PHI instructions before the
+ // terminator.
+ BasicBlock *Parent =
+ BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
+ if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad())
+ LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
+ }
}
AddrMode.BaseOffs -= ConstantOffset;
return false;
@@ -3810,7 +3943,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
PromotedOperand)) {
AddrMode = BackupAddrMode;
AddrModeInsts.resize(OldSize);
- DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
+ LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
TPT.rollback(LastKnownGood);
return false;
}
@@ -4124,12 +4257,13 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// will tell us if the addressing mode for the memory operation will
// *actually* cover the shared instruction.
ExtAddrMode Result;
+ std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
+ 0);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI,
- AddressAccessTy, AS,
- MemoryInst, Result, InsertedInsts,
- PromotedInsts, TPT);
+ AddressingModeMatcher Matcher(
+ MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
+ InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
Matcher.IgnoreProfitability = true;
bool Success = Matcher.matchAddr(Address, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@@ -4231,11 +4365,24 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// the result may differ depending on what other uses our candidate
// addressing instructions might have.
AddrModeInsts.clear();
+ std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
+ 0);
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
- InsertedInsts, PromotedInsts, TPT);
- NewAddrMode.OriginalValue = V;
+ InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
+
+ GetElementPtrInst *GEP = LargeOffsetGEP.first;
+ if (GEP && GEP->getParent() != MemoryInst->getParent() &&
+ !NewGEPBases.count(GEP)) {
+ // If splitting the underlying data structure can reduce the offset of a
+ // GEP, collect the GEP. Skip the GEPs that are the new bases of
+ // previously split data structures.
+ LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
+ if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end())
+ LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size();
+ }
+ NewAddrMode.OriginalValue = V;
if (!AddrModes.addNewAddrMode(NewAddrMode))
break;
}
@@ -4259,7 +4406,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
return IsNonLocalValue(V, MemoryInst->getParent());
})) {
- DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n");
+ LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
+ << "\n");
return false;
}
@@ -4278,17 +4426,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
if (SunkAddr) {
- DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
- << *MemoryInst << "\n");
+ LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
+ << " for " << *MemoryInst << "\n");
if (SunkAddr->getType() != Addr->getType())
SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
} else if (AddrSinkUsingGEPs ||
- (!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
- SubtargetInfo->useAA())) {
+ (!AddrSinkUsingGEPs.getNumOccurrences() && TM && TTI->useAA())) {
// By default, we use the GEP-based method when AA is used later. This
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
- DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
- << *MemoryInst << "\n");
+ LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
+ << " for " << *MemoryInst << "\n");
Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
Value *ResultPtr = nullptr, *ResultIndex = nullptr;
@@ -4427,8 +4574,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
return false;
- DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
- << *MemoryInst << "\n");
+ LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
+ << " for " << *MemoryInst << "\n");
Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
Value *Result = nullptr;
@@ -4554,7 +4701,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
return MadeChange;
}
-/// \brief Check if all the uses of \p Val are equivalent (or free) zero or
+/// Check if all the uses of \p Val are equivalent (or free) zero or
/// sign extensions.
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
assert(!Val->use_empty() && "Input must have at least one use");
@@ -4602,7 +4749,7 @@ static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
return true;
}
-/// \brief Try to speculatively promote extensions in \p Exts and continue
+/// Try to speculatively promote extensions in \p Exts and continue
/// promoting through newly promoted operands recursively as far as doing so is
/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
/// When some promotion happened, \p TPT contains the proper state to revert
@@ -4728,7 +4875,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
}
if (!DT.dominates(Pt, Inst))
// Give up if we need to merge in a common dominator as the
- // expermients show it is not profitable.
+ // experiments show it is not profitable.
continue;
Inst->replaceAllUsesWith(Pt);
RemovedInsts.insert(Inst);
@@ -4744,6 +4891,154 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
return Changed;
}
+// Spliting large data structures so that the GEPs accessing them can have
+// smaller offsets so that they can be sunk to the same blocks as their users.
+// For example, a large struct starting from %base is splitted into two parts
+// where the second part starts from %new_base.
+//
+// Before:
+// BB0:
+// %base =
+//
+// BB1:
+// %gep0 = gep %base, off0
+// %gep1 = gep %base, off1
+// %gep2 = gep %base, off2
+//
+// BB2:
+// %load1 = load %gep0
+// %load2 = load %gep1
+// %load3 = load %gep2
+//
+// After:
+// BB0:
+// %base =
+// %new_base = gep %base, off0
+//
+// BB1:
+// %new_gep0 = %new_base
+// %new_gep1 = gep %new_base, off1 - off0
+// %new_gep2 = gep %new_base, off2 - off0
+//
+// BB2:
+// %load1 = load i32, i32* %new_gep0
+// %load2 = load i32, i32* %new_gep1
+// %load3 = load i32, i32* %new_gep2
+//
+// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
+// their offsets are smaller enough to fit into the addressing mode.
+bool CodeGenPrepare::splitLargeGEPOffsets() {
+ bool Changed = false;
+ for (auto &Entry : LargeOffsetGEPMap) {
+ Value *OldBase = Entry.first;
+ SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
+ &LargeOffsetGEPs = Entry.second;
+ auto compareGEPOffset =
+ [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
+ const std::pair<GetElementPtrInst *, int64_t> &RHS) {
+ if (LHS.first == RHS.first)
+ return false;
+ if (LHS.second != RHS.second)
+ return LHS.second < RHS.second;
+ return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
+ };
+ // Sorting all the GEPs of the same data structures based on the offsets.
+ llvm::sort(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end(),
+ compareGEPOffset);
+ LargeOffsetGEPs.erase(
+ std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
+ LargeOffsetGEPs.end());
+ // Skip if all the GEPs have the same offsets.
+ if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
+ continue;
+ GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
+ int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
+ Value *NewBaseGEP = nullptr;
+
+ auto LargeOffsetGEP = LargeOffsetGEPs.begin();
+ while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
+ GetElementPtrInst *GEP = LargeOffsetGEP->first;
+ int64_t Offset = LargeOffsetGEP->second;
+ if (Offset != BaseOffset) {
+ TargetLowering::AddrMode AddrMode;
+ AddrMode.BaseOffs = Offset - BaseOffset;
+ // The result type of the GEP might not be the type of the memory
+ // access.
+ if (!TLI->isLegalAddressingMode(*DL, AddrMode,
+ GEP->getResultElementType(),
+ GEP->getAddressSpace())) {
+ // We need to create a new base if the offset to the current base is
+ // too large to fit into the addressing mode. So, a very large struct
+ // may be splitted into several parts.
+ BaseGEP = GEP;
+ BaseOffset = Offset;
+ NewBaseGEP = nullptr;
+ }
+ }
+
+ // Generate a new GEP to replace the current one.
+ IRBuilder<> Builder(GEP);
+ Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ Type *I8PtrTy =
+ Builder.getInt8PtrTy(GEP->getType()->getPointerAddressSpace());
+ Type *I8Ty = Builder.getInt8Ty();
+
+ if (!NewBaseGEP) {
+ // Create a new base if we don't have one yet. Find the insertion
+ // pointer for the new base first.
+ BasicBlock::iterator NewBaseInsertPt;
+ BasicBlock *NewBaseInsertBB;
+ if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
+ // If the base of the struct is an instruction, the new base will be
+ // inserted close to it.
+ NewBaseInsertBB = BaseI->getParent();
+ if (isa<PHINode>(BaseI))
+ NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+ else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
+ NewBaseInsertBB =
+ SplitEdge(NewBaseInsertBB, Invoke->getNormalDest());
+ NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+ } else
+ NewBaseInsertPt = std::next(BaseI->getIterator());
+ } else {
+ // If the current base is an argument or global value, the new base
+ // will be inserted to the entry block.
+ NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
+ NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
+ }
+ IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
+ // Create a new base.
+ Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset);
+ NewBaseGEP = OldBase;
+ if (NewBaseGEP->getType() != I8PtrTy)
+ NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
+ NewBaseGEP =
+ NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
+ NewGEPBases.insert(NewBaseGEP);
+ }
+
+ Value *NewGEP = NewBaseGEP;
+ if (Offset == BaseOffset) {
+ if (GEP->getType() != I8PtrTy)
+ NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
+ } else {
+ // Calculate the new offset for the new GEP.
+ Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset);
+ NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
+
+ if (GEP->getType() != I8PtrTy)
+ NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
+ }
+ GEP->replaceAllUsesWith(NewGEP);
+ LargeOffsetGEPID.erase(GEP);
+ LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
+ GEP->eraseFromParent();
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
/// Return true, if an ext(load) can be formed from an extension in
/// \p MovedExts.
bool CodeGenPrepare::canFormExtLd(
@@ -5053,8 +5348,7 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
// x = phi x1', x2'
// y = and x, 0xff
bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
- if (!Load->isSimple() ||
- !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy()))
+ if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
return false;
// Skip loads we've already transformed.
@@ -5519,7 +5813,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
namespace {
-/// \brief Helper class to promote a scalar operation to a vector one.
+/// Helper class to promote a scalar operation to a vector one.
/// This class is used to move downward extractelement transition.
/// E.g.,
/// a = vector_op <2 x i32>
@@ -5556,7 +5850,7 @@ class VectorPromoteHelper {
/// Instruction that will be combined with the transition.
Instruction *CombineInst = nullptr;
- /// \brief The instruction that represents the current end of the transition.
+ /// The instruction that represents the current end of the transition.
/// Since we are faking the promotion until we reach the end of the chain
/// of computation, we need a way to get the current end of the transition.
Instruction *getEndOfTransition() const {
@@ -5565,7 +5859,7 @@ class VectorPromoteHelper {
return InstsToBePromoted.back();
}
- /// \brief Return the index of the original value in the transition.
+ /// Return the index of the original value in the transition.
/// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
/// c, is at index 0.
unsigned getTransitionOriginalValueIdx() const {
@@ -5574,7 +5868,7 @@ class VectorPromoteHelper {
return 0;
}
- /// \brief Return the index of the index in the transition.
+ /// Return the index of the index in the transition.
/// E.g., for "extractelement <2 x i32> c, i32 0" the index
/// is at index 1.
unsigned getTransitionIdx() const {
@@ -5583,7 +5877,7 @@ class VectorPromoteHelper {
return 1;
}
- /// \brief Get the type of the transition.
+ /// Get the type of the transition.
/// This is the type of the original value.
/// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
/// transition is <2 x i32>.
@@ -5591,7 +5885,7 @@ class VectorPromoteHelper {
return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
}
- /// \brief Promote \p ToBePromoted by moving \p Def downward through.
+ /// Promote \p ToBePromoted by moving \p Def downward through.
/// I.e., we have the following sequence:
/// Def = Transition <ty1> a to <ty2>
/// b = ToBePromoted <ty2> Def, ...
@@ -5600,7 +5894,7 @@ class VectorPromoteHelper {
/// Def = Transition <ty1> ToBePromoted to <ty2>
void promoteImpl(Instruction *ToBePromoted);
- /// \brief Check whether or not it is profitable to promote all the
+ /// Check whether or not it is profitable to promote all the
/// instructions enqueued to be promoted.
bool isProfitableToPromote() {
Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
@@ -5646,12 +5940,13 @@ class VectorPromoteHelper {
VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
Arg0OVK, Arg1OVK);
}
- DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
- << ScalarCost << "\nVector: " << VectorCost << '\n');
+ LLVM_DEBUG(
+ dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
+ << ScalarCost << "\nVector: " << VectorCost << '\n');
return ScalarCost > VectorCost;
}
- /// \brief Generate a constant vector with \p Val with the same
+ /// Generate a constant vector with \p Val with the same
/// number of elements as the transition.
/// \p UseSplat defines whether or not \p Val should be replicated
/// across the whole vector.
@@ -5686,7 +5981,7 @@ class VectorPromoteHelper {
return ConstantVector::get(ConstVec);
}
- /// \brief Check if promoting to a vector type an operand at \p OperandIdx
+ /// Check if promoting to a vector type an operand at \p OperandIdx
/// in \p Use can trigger undefined behavior.
static bool canCauseUndefinedBehavior(const Instruction *Use,
unsigned OperandIdx) {
@@ -5718,13 +6013,13 @@ public:
assert(Transition && "Do not know how to promote null");
}
- /// \brief Check if we can promote \p ToBePromoted to \p Type.
+ /// Check if we can promote \p ToBePromoted to \p Type.
bool canPromote(const Instruction *ToBePromoted) const {
// We could support CastInst too.
return isa<BinaryOperator>(ToBePromoted);
}
- /// \brief Check if it is profitable to promote \p ToBePromoted
+ /// Check if it is profitable to promote \p ToBePromoted
/// by moving downward the transition through.
bool shouldPromote(const Instruction *ToBePromoted) const {
// Promote only if all the operands can be statically expanded.
@@ -5752,23 +6047,23 @@ public:
ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
}
- /// \brief Check whether or not \p Use can be combined
+ /// Check whether or not \p Use can be combined
/// with the transition.
/// I.e., is it possible to do Use(Transition) => AnotherUse?
bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
- /// \brief Record \p ToBePromoted as part of the chain to be promoted.
+ /// Record \p ToBePromoted as part of the chain to be promoted.
void enqueueForPromotion(Instruction *ToBePromoted) {
InstsToBePromoted.push_back(ToBePromoted);
}
- /// \brief Set the instruction that will be combined with the transition.
+ /// Set the instruction that will be combined with the transition.
void recordCombineInstruction(Instruction *ToBeCombined) {
assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
CombineInst = ToBeCombined;
}
- /// \brief Promote all the instructions enqueued for promotion if it is
+ /// Promote all the instructions enqueued for promotion if it is
/// is profitable.
/// \return True if the promotion happened, false otherwise.
bool promote() {
@@ -5852,35 +6147,36 @@ bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
// => we would need to check that we are moving it at a cheaper place and
// we do not do that for now.
BasicBlock *Parent = Inst->getParent();
- DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
+ LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
// If the transition has more than one use, assume this is not going to be
// beneficial.
while (Inst->hasOneUse()) {
Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
- DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
+ LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
if (ToBePromoted->getParent() != Parent) {
- DEBUG(dbgs() << "Instruction to promote is in a different block ("
- << ToBePromoted->getParent()->getName()
- << ") than the transition (" << Parent->getName() << ").\n");
+ LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
+ << ToBePromoted->getParent()->getName()
+ << ") than the transition (" << Parent->getName()
+ << ").\n");
return false;
}
if (VPH.canCombine(ToBePromoted)) {
- DEBUG(dbgs() << "Assume " << *Inst << '\n'
- << "will be combined with: " << *ToBePromoted << '\n');
+ LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
+ << "will be combined with: " << *ToBePromoted << '\n');
VPH.recordCombineInstruction(ToBePromoted);
bool Changed = VPH.promote();
NumStoreExtractExposed += Changed;
return Changed;
}
- DEBUG(dbgs() << "Try promoting.\n");
+ LLVM_DEBUG(dbgs() << "Try promoting.\n");
if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
return false;
- DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
+ LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
VPH.enqueueForPromotion(ToBePromoted);
Inst = ToBePromoted;
@@ -5890,7 +6186,7 @@ bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
/// For the instruction sequence of store below, F and I values
/// are bundled together as an i64 value before being stored into memory.
-/// Sometimes it is more efficent to generate separate stores for F and I,
+/// Sometimes it is more efficient to generate separate stores for F and I,
/// which can remove the bitwise instructions or sink them to colder places.
///
/// (store (or (zext (bitcast F to i32) to i64),
@@ -5978,12 +6274,13 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
if (HBC && HBC->getParent() != SI.getParent())
HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
+ bool IsLE = SI.getModule()->getDataLayout().isLittleEndian();
auto CreateSplitStore = [&](Value *V, bool Upper) {
V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
Value *Addr = Builder.CreateBitCast(
SI.getOperand(1),
SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
- if (Upper)
+ if ((IsLE && Upper) || (!IsLE && !Upper))
Addr = Builder.CreateGEP(
SplitStoreType, Addr,
ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
@@ -6270,6 +6567,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
/// The GEP operand must be a pointer, so must its result -> BitCast
Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
GEPI->getName(), GEPI);
+ NC->setDebugLoc(GEPI->getDebugLoc());
GEPI->replaceAllUsesWith(NC);
GEPI->eraseFromParent();
++NumGEPsElim;
@@ -6374,7 +6672,8 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
// after it.
if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
continue;
- DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
+ LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
+ << *DVI << ' ' << *VI);
DVI->removeFromParent();
if (isa<PHINode>(VI))
DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
@@ -6388,7 +6687,7 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
return MadeChange;
}
-/// \brief Scale down both weights to fit into uint32_t.
+/// Scale down both weights to fit into uint32_t.
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
@@ -6396,7 +6695,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
NewFalse = NewFalse / Scale;
}
-/// \brief Some targets prefer to split a conditional branch like:
+/// Some targets prefer to split a conditional branch like:
/// \code
/// %0 = icmp ne i32 %a, 0
/// %1 = icmp ne i32 %b, 0
@@ -6453,7 +6752,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
!match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) )
continue;
- DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
+ LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
// Create a new BB.
auto TmpBB =
@@ -6465,8 +6764,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
Br1->setCondition(Cond1);
LogicOp->eraseFromParent();
- // Depending on the conditon we have to either replace the true or the false
- // successor of the original branch instruction.
+ // Depending on the condition we have to either replace the true or the
+ // false successor of the original branch instruction.
if (Opc == Instruction::And)
Br1->setSuccessor(0, TmpBB);
else
@@ -6519,8 +6818,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
// We have flexibility in setting Prob for BB1 and Prob for NewBB.
// The requirement is that
// TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
- // = TrueProb for orignal BB.
- // Assuming the orignal weights are A and B, one choice is to set BB1's
+ // = TrueProb for original BB.
+ // Assuming the original weights are A and B, one choice is to set BB1's
// weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
// assumes that
// TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
@@ -6554,8 +6853,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
// We have flexibility in setting Prob for BB1 and Prob for TmpBB.
// The requirement is that
// FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
- // = FalseProb for orignal BB.
- // Assuming the orignal weights are A and B, one choice is to set BB1's
+ // = FalseProb for original BB.
+ // Assuming the original weights are A and B, one choice is to set BB1's
// weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
// assumes that
// FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
@@ -6581,8 +6880,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
MadeChange = true;
- DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
- TmpBB->dump());
+ LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
+ TmpBB->dump());
}
return MadeChange;
}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 98e22b24d37a..840e5ede6444 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -113,7 +113,7 @@ void CriticalAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
// FIXME: It may be possible to remove the isKill() restriction once PR18663
// has been properly fixed. There can be value in processing kills as seen in
// the AggressiveAntiDepBreaker class.
- if (MI.isDebugValue() || MI.isKill())
+ if (MI.isDebugInstr() || MI.isKill())
return;
assert(Count < InsertPosIndex && "Instruction index out of expected range!");
@@ -170,11 +170,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
// FIXME: The issue with predicated instruction is more complex. We are being
// conservative here because the kill markers cannot be trusted after
// if-conversion:
- // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // %r6 = LDR %sp, %reg0, 92, 14, %reg0; mem:LD4[FixedStack14]
// ...
- // STR %r0, killed %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395]
- // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12]
- // STR %r0, killed %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ // STR %r0, killed %r6, %reg0, 0, 0, %cpsr; mem:ST4[%395]
+ // %r6 = LDR %sp, %reg0, 100, 0, %cpsr; mem:LD4[FixedStack12]
+ // STR %r0, killed %r6, %reg0, 0, 14, %reg0; mem:ST4[%396](align=8)
//
// The first R6 kill is not really a kill since it's killed by a predicated
// instruction which may not be executed. The second R6 def may or may not
@@ -461,14 +461,14 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
#ifndef NDEBUG
{
- DEBUG(dbgs() << "Critical path has total latency "
- << (Max->getDepth() + Max->Latency) << "\n");
- DEBUG(dbgs() << "Available regs:");
+ LLVM_DEBUG(dbgs() << "Critical path has total latency "
+ << (Max->getDepth() + Max->Latency) << "\n");
+ LLVM_DEBUG(dbgs() << "Available regs:");
for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
if (KillIndices[Reg] == ~0u)
- DEBUG(dbgs() << " " << printReg(Reg, TRI));
+ LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI));
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
}
#endif
@@ -534,7 +534,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
// FIXME: It may be possible to remove the isKill() restriction once PR18663
// has been properly fixed. There can be value in processing kills as seen
// in the AggressiveAntiDepBreaker class.
- if (MI.isDebugValue() || MI.isKill())
+ if (MI.isDebugInstr() || MI.isKill())
continue;
// Check if this instruction has a dependence on the critical path that
@@ -645,10 +645,10 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
AntiDepReg,
LastNewReg[AntiDepReg],
RC, ForbidRegs)) {
- DEBUG(dbgs() << "Breaking anti-dependence edge on "
- << printReg(AntiDepReg, TRI) << " with "
- << RegRefs.count(AntiDepReg) << " references"
- << " using " << printReg(NewReg, TRI) << "!\n");
+ LLVM_DEBUG(dbgs() << "Breaking anti-dependence edge on "
+ << printReg(AntiDepReg, TRI) << " with "
+ << RegRefs.count(AntiDepReg) << " references"
+ << " using " << printReg(NewReg, TRI) << "!\n");
// Update the references to the old register to refer to the new
// register.
diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
index 848db444270d..cd302e78cc3e 100644
--- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -222,7 +222,7 @@ VLIWPacketizerList::~VLIWPacketizerList() {
// End the current packet, bundle packet instructions and reset DFA state.
void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
MachineBasicBlock::iterator MI) {
- DEBUG({
+ LLVM_DEBUG({
if (!CurrentPacketMIs.empty()) {
dbgs() << "Finalizing packet:\n";
for (MachineInstr *MI : CurrentPacketMIs)
@@ -235,7 +235,7 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
}
CurrentPacketMIs.clear();
ResourceTracker->clearResources();
- DEBUG(dbgs() << "End packet\n");
+ LLVM_DEBUG(dbgs() << "End packet\n");
}
// Bundle machine instructions into packets.
@@ -248,7 +248,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
std::distance(BeginItr, EndItr));
VLIWScheduler->schedule();
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Scheduling DAG of the packetize region\n";
for (SUnit &SU : VLIWScheduler->SUnits)
SU.dumpAll(VLIWScheduler);
@@ -287,10 +287,10 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
assert(SUI && "Missing SUnit Info!");
// Ask DFA if machine resource is available for MI.
- DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI);
+ LLVM_DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI);
bool ResourceAvail = ResourceTracker->canReserveResources(MI);
- DEBUG({
+ LLVM_DEBUG({
if (ResourceAvail)
dbgs() << " Resources are available for adding MI to packet\n";
else
@@ -302,31 +302,33 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
SUnit *SUJ = MIToSUnit[MJ];
assert(SUJ && "Missing SUnit Info!");
- DEBUG(dbgs() << " Checking against MJ " << *MJ);
+ LLVM_DEBUG(dbgs() << " Checking against MJ " << *MJ);
// Is it legal to packetize SUI and SUJ together.
if (!isLegalToPacketizeTogether(SUI, SUJ)) {
- DEBUG(dbgs() << " Not legal to add MI, try to prune\n");
+ LLVM_DEBUG(dbgs() << " Not legal to add MI, try to prune\n");
// Allow packetization if dependency can be pruned.
if (!isLegalToPruneDependencies(SUI, SUJ)) {
// End the packet if dependency cannot be pruned.
- DEBUG(dbgs() << " Could not prune dependencies for adding MI\n");
+ LLVM_DEBUG(dbgs()
+ << " Could not prune dependencies for adding MI\n");
endPacket(MBB, MI);
break;
}
- DEBUG(dbgs() << " Pruned dependence for adding MI\n");
+ LLVM_DEBUG(dbgs() << " Pruned dependence for adding MI\n");
}
}
} else {
- DEBUG(if (ResourceAvail)
- dbgs() << "Resources are available, but instruction should not be "
- "added to packet\n " << MI);
+ LLVM_DEBUG(if (ResourceAvail) dbgs()
+ << "Resources are available, but instruction should not be "
+ "added to packet\n "
+ << MI);
// End the packet if resource is not available, or if the instruction
// shoud not be added to the current packet.
endPacket(MBB, MI);
}
// Add MI to the current packet.
- DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n');
+ LLVM_DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n');
BeginItr = addToPacket(MI);
} // For all instructions in the packetization range.
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index e6a54bb300f2..ff44c5660bad 100644
--- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -125,7 +125,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
// If the instruction is dead, delete it!
if (isDead(MI)) {
- DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+ LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
// It is possible that some DBG_VALUE instructions refer to this
// instruction. They get marked as undef and will be deleted
// in the live debug variable analysis.
diff --git a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
index 7d7eb57352a2..c83db476a4de 100644
--- a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
+++ b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -439,7 +439,7 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
CrossCopy = isCrossCopy(*MRI, UseMI, DstRC, MO);
if (CrossCopy)
- DEBUG(dbgs() << "Copy across incompatible classes: " << UseMI);
+ LLVM_DEBUG(dbgs() << "Copy across incompatible classes: " << UseMI);
}
if (!CrossCopy)
@@ -520,17 +520,15 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
transferDefinedLanesStep(MO, Info.DefinedLanes);
}
- DEBUG(
- dbgs() << "Defined/Used lanes:\n";
- for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
- const VRegInfo &Info = VRegInfos[RegIdx];
- dbgs() << printReg(Reg, nullptr)
- << " Used: " << PrintLaneMask(Info.UsedLanes)
- << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n';
- }
- dbgs() << "\n";
- );
+ LLVM_DEBUG(dbgs() << "Defined/Used lanes:\n"; for (unsigned RegIdx = 0;
+ RegIdx < NumVirtRegs;
+ ++RegIdx) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+ const VRegInfo &Info = VRegInfos[RegIdx];
+ dbgs() << printReg(Reg, nullptr)
+ << " Used: " << PrintLaneMask(Info.UsedLanes)
+ << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n';
+ } dbgs() << "\n";);
bool Again = false;
// Mark operands as dead/unused.
@@ -545,18 +543,19 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
const VRegInfo &RegInfo = VRegInfos[RegIdx];
if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) {
- DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI);
+ LLVM_DEBUG(dbgs()
+ << "Marking operand '" << MO << "' as dead in " << MI);
MO.setIsDead();
}
if (MO.readsReg()) {
bool CrossCopy = false;
if (isUndefRegAtInput(MO, RegInfo)) {
- DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in "
- << MI);
+ LLVM_DEBUG(dbgs()
+ << "Marking operand '" << MO << "' as undef in " << MI);
MO.setIsUndef();
} else if (isUndefInput(MO, &CrossCopy)) {
- DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in "
- << MI);
+ LLVM_DEBUG(dbgs()
+ << "Marking operand '" << MO << "' as undef in " << MI);
MO.setIsUndef();
if (CrossCopy)
Again = true;
@@ -577,7 +576,7 @@ bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
// so we safe the compile time.
MRI = &MF.getRegInfo();
if (!MRI->subRegLivenessEnabled()) {
- DEBUG(dbgs() << "Skipping Detect dead lanes pass\n");
+ LLVM_DEBUG(dbgs() << "Skipping Detect dead lanes pass\n");
return false;
}
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index 39d80c0bf9bd..4586649d17f0 100644
--- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -33,7 +34,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Utils/Local.h"
#include <cstddef>
using namespace llvm;
@@ -195,9 +195,9 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
if (Resumes.empty())
return false;
- // Check the personality, don't do anything if it's funclet-based.
+ // Check the personality, don't do anything if it's scope-based.
EHPersonality Pers = classifyEHPersonality(Fn.getPersonalityFn());
- if (isFuncletEHPersonality(Pers))
+ if (isScopedEHPersonality(Pers))
return false;
LLVMContext &Ctx = Fn.getContext();
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 6294ff450113..098afd885f2f 100644
--- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -185,7 +185,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
// Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
// get right.
if (!MBB->livein_empty()) {
- DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
return false;
}
@@ -195,18 +195,18 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
// terminators never have side effects or define any used register values.
for (MachineBasicBlock::iterator I = MBB->begin(),
E = MBB->getFirstTerminator(); I != E; ++I) {
- if (I->isDebugValue())
+ if (I->isDebugInstr())
continue;
if (++InstrCount > BlockInstrLimit && !Stress) {
- DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
- << BlockInstrLimit << " instructions.\n");
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
+ << BlockInstrLimit << " instructions.\n");
return false;
}
// There shouldn't normally be any phis in a single-predecessor block.
if (I->isPHI()) {
- DEBUG(dbgs() << "Can't hoist: " << *I);
+ LLVM_DEBUG(dbgs() << "Can't hoist: " << *I);
return false;
}
@@ -214,21 +214,21 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
// speculate GOT or constant pool loads that are guaranteed not to trap,
// but we don't support that for now.
if (I->mayLoad()) {
- DEBUG(dbgs() << "Won't speculate load: " << *I);
+ LLVM_DEBUG(dbgs() << "Won't speculate load: " << *I);
return false;
}
// We never speculate stores, so an AA pointer isn't necessary.
bool DontMoveAcrossStore = true;
if (!I->isSafeToMove(nullptr, DontMoveAcrossStore)) {
- DEBUG(dbgs() << "Can't speculate: " << *I);
+ LLVM_DEBUG(dbgs() << "Can't speculate: " << *I);
return false;
}
// Check for any dependencies on Head instructions.
for (const MachineOperand &MO : I->operands()) {
if (MO.isRegMask()) {
- DEBUG(dbgs() << "Won't speculate regmask: " << *I);
+ LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I);
return false;
}
if (!MO.isReg())
@@ -246,9 +246,10 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
if (!DefMI || DefMI->getParent() != Head)
continue;
if (InsertAfter.insert(DefMI).second)
- DEBUG(dbgs() << printMBBReference(*MBB) << " depends on " << *DefMI);
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " depends on "
+ << *DefMI);
if (DefMI->isTerminator()) {
- DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
+ LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
return false;
}
}
@@ -279,7 +280,7 @@ bool SSAIfConv::findInsertionPoint() {
--I;
// Some of the conditional code depends in I.
if (InsertAfter.count(&*I)) {
- DEBUG(dbgs() << "Can't insert code after " << *I);
+ LLVM_DEBUG(dbgs() << "Can't insert code after " << *I);
return false;
}
@@ -313,7 +314,7 @@ bool SSAIfConv::findInsertionPoint() {
// Some of the clobbered registers are live before I, not a valid insertion
// point.
if (!LiveRegUnits.empty()) {
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Would clobber";
for (SparseSet<unsigned>::const_iterator
i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i)
@@ -325,10 +326,10 @@ bool SSAIfConv::findInsertionPoint() {
// This is a valid insertion point.
InsertionPoint = I;
- DEBUG(dbgs() << "Can insert before " << *I);
+ LLVM_DEBUG(dbgs() << "Can insert before " << *I);
return true;
}
- DEBUG(dbgs() << "No legal insertion point found.\n");
+ LLVM_DEBUG(dbgs() << "No legal insertion point found.\n");
return false;
}
@@ -361,39 +362,39 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 ||
Succ1->succ_begin()[0] != Tail)
return false;
- DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> "
- << printMBBReference(*Succ0) << "/"
- << printMBBReference(*Succ1) << " -> "
- << printMBBReference(*Tail) << '\n');
+ LLVM_DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> "
+ << printMBBReference(*Succ0) << "/"
+ << printMBBReference(*Succ1) << " -> "
+ << printMBBReference(*Tail) << '\n');
// Live-in physregs are tricky to get right when speculating code.
if (!Tail->livein_empty()) {
- DEBUG(dbgs() << "Tail has live-ins.\n");
+ LLVM_DEBUG(dbgs() << "Tail has live-ins.\n");
return false;
}
} else {
- DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
- << printMBBReference(*Succ0) << " -> "
- << printMBBReference(*Tail) << '\n');
+ LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
+ << printMBBReference(*Succ0) << " -> "
+ << printMBBReference(*Tail) << '\n');
}
// This is a triangle or a diamond.
// If Tail doesn't have any phis, there must be side effects.
if (Tail->empty() || !Tail->front().isPHI()) {
- DEBUG(dbgs() << "No phis in tail.\n");
+ LLVM_DEBUG(dbgs() << "No phis in tail.\n");
return false;
}
// The branch we're looking to eliminate must be analyzable.
Cond.clear();
if (TII->analyzeBranch(*Head, TBB, FBB, Cond)) {
- DEBUG(dbgs() << "Branch not analyzable.\n");
+ LLVM_DEBUG(dbgs() << "Branch not analyzable.\n");
return false;
}
// This is weird, probably some sort of degenerate CFG.
if (!TBB) {
- DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n");
+ LLVM_DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n");
return false;
}
@@ -422,7 +423,7 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
// Get target information.
if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,
PI.CondCycles, PI.TCycles, PI.FCycles)) {
- DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
+ LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
return false;
}
}
@@ -459,10 +460,10 @@ void SSAIfConv::replacePHIInstrs() {
// Convert all PHIs to select instructions inserted before FirstTerm.
for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
PHIInfo &PI = PHIs[i];
- DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
unsigned DstReg = PI.PHI->getOperand(0).getReg();
TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
- DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
+ LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
PI.PHI->eraseFromParent();
PI.PHI = nullptr;
}
@@ -481,7 +482,7 @@ void SSAIfConv::rewritePHIOperands() {
PHIInfo &PI = PHIs[i];
unsigned DstReg = 0;
- DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
if (PI.TReg == PI.FReg) {
// We do not need the select instruction if both incoming values are
// equal.
@@ -491,7 +492,7 @@ void SSAIfConv::rewritePHIOperands() {
DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
TII->insertSelect(*Head, FirstTerm, HeadDL,
DstReg, Cond, PI.TReg, PI.FReg);
- DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
+ LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
}
// Rewrite PHI operands TPred -> (DstReg, Head), remove FPred.
@@ -505,7 +506,7 @@ void SSAIfConv::rewritePHIOperands() {
PI.PHI->RemoveOperand(i-2);
}
}
- DEBUG(dbgs() << " --> " << *PI.PHI);
+ LLVM_DEBUG(dbgs() << " --> " << *PI.PHI);
}
}
@@ -563,8 +564,8 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
assert(Head->succ_empty() && "Additional head successors?");
if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {
// Splice Tail onto the end of Head.
- DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail) << " into head "
- << printMBBReference(*Head) << '\n');
+ LLVM_DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail)
+ << " into head " << printMBBReference(*Head) << '\n');
Head->splice(Head->end(), Tail,
Tail->begin(), Tail->end());
Head->transferSuccessorsAndUpdatePHIs(Tail);
@@ -572,12 +573,12 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
Tail->eraseFromParent();
} else {
// We need a branch to Tail, let code placement work it out later.
- DEBUG(dbgs() << "Converting to unconditional branch.\n");
+ LLVM_DEBUG(dbgs() << "Converting to unconditional branch.\n");
SmallVector<MachineOperand, 0> EmptyCond;
TII->insertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL);
Head->addSuccessor(Tail);
}
- DEBUG(dbgs() << *Head);
+ LLVM_DEBUG(dbgs() << *Head);
}
@@ -692,7 +693,7 @@ bool EarlyIfConverter::shouldConvertIf() {
MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());
MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred());
- DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace);
+ LLVM_DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace);
unsigned MinCrit = std::min(TBBTrace.getCriticalPath(),
FBBTrace.getCriticalPath());
@@ -706,10 +707,10 @@ bool EarlyIfConverter::shouldConvertIf() {
if (IfConv.TBB != IfConv.Tail)
ExtraBlocks.push_back(IfConv.TBB);
unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks);
- DEBUG(dbgs() << "Resource length " << ResLength
- << ", minimal critical path " << MinCrit << '\n');
+ LLVM_DEBUG(dbgs() << "Resource length " << ResLength
+ << ", minimal critical path " << MinCrit << '\n');
if (ResLength > MinCrit + CritLimit) {
- DEBUG(dbgs() << "Not enough available ILP.\n");
+ LLVM_DEBUG(dbgs() << "Not enough available ILP.\n");
return false;
}
@@ -719,7 +720,7 @@ bool EarlyIfConverter::shouldConvertIf() {
MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head);
unsigned BranchDepth =
HeadTrace.getInstrCycles(*IfConv.Head->getFirstTerminator()).Depth;
- DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');
+ LLVM_DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');
// Look at all the tail phis, and compute the critical path extension caused
// by inserting select instructions.
@@ -728,15 +729,15 @@ bool EarlyIfConverter::shouldConvertIf() {
SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
unsigned Slack = TailTrace.getInstrSlack(*PI.PHI);
unsigned MaxDepth = Slack + TailTrace.getInstrCycles(*PI.PHI).Depth;
- DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);
+ LLVM_DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);
// The condition is pulled into the critical path.
unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles);
if (CondDepth > MaxDepth) {
unsigned Extra = CondDepth - MaxDepth;
- DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");
+ LLVM_DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");
if (Extra > CritLimit) {
- DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
return false;
}
}
@@ -745,9 +746,9 @@ bool EarlyIfConverter::shouldConvertIf() {
unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(*PI.PHI), PI.TCycles);
if (TDepth > MaxDepth) {
unsigned Extra = TDepth - MaxDepth;
- DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
+ LLVM_DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
if (Extra > CritLimit) {
- DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
return false;
}
}
@@ -756,9 +757,9 @@ bool EarlyIfConverter::shouldConvertIf() {
unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(*PI.PHI), PI.FCycles);
if (FDepth > MaxDepth) {
unsigned Extra = FDepth - MaxDepth;
- DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
+ LLVM_DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
if (Extra > CritLimit) {
- DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
return false;
}
}
@@ -783,8 +784,8 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
}
bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
- << "********** Function: " << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
+ << "********** Function: " << MF.getName() << '\n');
if (skipFunction(MF.getFunction()))
return false;
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
deleted file mode 100644
index 61ec3f4be1dc..000000000000
--- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
+++ /dev/null
@@ -1,755 +0,0 @@
-//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/ExecutionDepsFix.h"
-
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/iterator_range.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "execution-deps-fix"
-
-/// Translate TRI register number to a list of indices into our smaller tables
-/// of interesting registers.
-iterator_range<SmallVectorImpl<int>::const_iterator>
-ExecutionDepsFix::regIndices(unsigned Reg) const {
- assert(Reg < AliasMap.size() && "Invalid register");
- const auto &Entry = AliasMap[Reg];
- return make_range(Entry.begin(), Entry.end());
-}
-
-DomainValue *ExecutionDepsFix::alloc(int domain) {
- DomainValue *dv = Avail.empty() ?
- new(Allocator.Allocate()) DomainValue :
- Avail.pop_back_val();
- if (domain >= 0)
- dv->addDomain(domain);
- assert(dv->Refs == 0 && "Reference count wasn't cleared");
- assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
- return dv;
-}
-
-/// Release a reference to DV. When the last reference is released,
-/// collapse if needed.
-void ExecutionDepsFix::release(DomainValue *DV) {
- while (DV) {
- assert(DV->Refs && "Bad DomainValue");
- if (--DV->Refs)
- return;
-
- // There are no more DV references. Collapse any contained instructions.
- if (DV->AvailableDomains && !DV->isCollapsed())
- collapse(DV, DV->getFirstDomain());
-
- DomainValue *Next = DV->Next;
- DV->clear();
- Avail.push_back(DV);
- // Also release the next DomainValue in the chain.
- DV = Next;
- }
-}
-
-/// Follow the chain of dead DomainValues until a live DomainValue is reached.
-/// Update the referenced pointer when necessary.
-DomainValue *ExecutionDepsFix::resolve(DomainValue *&DVRef) {
- DomainValue *DV = DVRef;
- if (!DV || !DV->Next)
- return DV;
-
- // DV has a chain. Find the end.
- do DV = DV->Next;
- while (DV->Next);
-
- // Update DVRef to point to DV.
- retain(DV);
- release(DVRef);
- DVRef = DV;
- return DV;
-}
-
-/// Set LiveRegs[rx] = dv, updating reference counts.
-void ExecutionDepsFix::setLiveReg(int rx, DomainValue *dv) {
- assert(unsigned(rx) < NumRegs && "Invalid index");
- assert(LiveRegs && "Must enter basic block first.");
-
- if (LiveRegs[rx].Value == dv)
- return;
- if (LiveRegs[rx].Value)
- release(LiveRegs[rx].Value);
- LiveRegs[rx].Value = retain(dv);
-}
-
-// Kill register rx, recycle or collapse any DomainValue.
-void ExecutionDepsFix::kill(int rx) {
- assert(unsigned(rx) < NumRegs && "Invalid index");
- assert(LiveRegs && "Must enter basic block first.");
- if (!LiveRegs[rx].Value)
- return;
-
- release(LiveRegs[rx].Value);
- LiveRegs[rx].Value = nullptr;
-}
-
-/// Force register rx into domain.
-void ExecutionDepsFix::force(int rx, unsigned domain) {
- assert(unsigned(rx) < NumRegs && "Invalid index");
- assert(LiveRegs && "Must enter basic block first.");
- if (DomainValue *dv = LiveRegs[rx].Value) {
- if (dv->isCollapsed())
- dv->addDomain(domain);
- else if (dv->hasDomain(domain))
- collapse(dv, domain);
- else {
- // This is an incompatible open DomainValue. Collapse it to whatever and
- // force the new value into domain. This costs a domain crossing.
- collapse(dv, dv->getFirstDomain());
- assert(LiveRegs[rx].Value && "Not live after collapse?");
- LiveRegs[rx].Value->addDomain(domain);
- }
- } else {
- // Set up basic collapsed DomainValue.
- setLiveReg(rx, alloc(domain));
- }
-}
-
-/// Collapse open DomainValue into given domain. If there are multiple
-/// registers using dv, they each get a unique collapsed DomainValue.
-void ExecutionDepsFix::collapse(DomainValue *dv, unsigned domain) {
- assert(dv->hasDomain(domain) && "Cannot collapse");
-
- // Collapse all the instructions.
- while (!dv->Instrs.empty())
- TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain);
- dv->setSingleDomain(domain);
-
- // If there are multiple users, give them new, unique DomainValues.
- if (LiveRegs && dv->Refs > 1)
- for (unsigned rx = 0; rx != NumRegs; ++rx)
- if (LiveRegs[rx].Value == dv)
- setLiveReg(rx, alloc(domain));
-}
-
-/// All instructions and registers in B are moved to A, and B is released.
-bool ExecutionDepsFix::merge(DomainValue *A, DomainValue *B) {
- assert(!A->isCollapsed() && "Cannot merge into collapsed");
- assert(!B->isCollapsed() && "Cannot merge from collapsed");
- if (A == B)
- return true;
- // Restrict to the domains that A and B have in common.
- unsigned common = A->getCommonDomains(B->AvailableDomains);
- if (!common)
- return false;
- A->AvailableDomains = common;
- A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
-
- // Clear the old DomainValue so we won't try to swizzle instructions twice.
- B->clear();
- // All uses of B are referred to A.
- B->Next = retain(A);
-
- for (unsigned rx = 0; rx != NumRegs; ++rx) {
- assert(LiveRegs && "no space allocated for live registers");
- if (LiveRegs[rx].Value == B)
- setLiveReg(rx, A);
- }
- return true;
-}
-
-/// Set up LiveRegs by merging predecessor live-out values.
-void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
- // Reset instruction counter in each basic block.
- CurInstr = 0;
-
- // Set up UndefReads to track undefined register reads.
- UndefReads.clear();
- LiveRegSet.clear();
-
- // Set up LiveRegs to represent registers entering MBB.
- if (!LiveRegs)
- LiveRegs = new LiveReg[NumRegs];
-
- // Default values are 'nothing happened a long time ago'.
- for (unsigned rx = 0; rx != NumRegs; ++rx) {
- LiveRegs[rx].Value = nullptr;
- LiveRegs[rx].Def = -(1 << 20);
- }
-
- // This is the entry block.
- if (MBB->pred_empty()) {
- for (const auto &LI : MBB->liveins()) {
- for (int rx : regIndices(LI.PhysReg)) {
- // Treat function live-ins as if they were defined just before the first
- // instruction. Usually, function arguments are set up immediately
- // before the call.
- LiveRegs[rx].Def = -1;
- }
- }
- DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n");
- return;
- }
-
- // Try to coalesce live-out registers from predecessors.
- for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
- pe = MBB->pred_end(); pi != pe; ++pi) {
- auto fi = MBBInfos.find(*pi);
- assert(fi != MBBInfos.end() &&
- "Should have pre-allocated MBBInfos for all MBBs");
- LiveReg *Incoming = fi->second.OutRegs;
- // Incoming is null if this is a backedge from a BB
- // we haven't processed yet
- if (Incoming == nullptr) {
- continue;
- }
-
- for (unsigned rx = 0; rx != NumRegs; ++rx) {
- // Use the most recent predecessor def for each register.
- LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, Incoming[rx].Def);
-
- DomainValue *pdv = resolve(Incoming[rx].Value);
- if (!pdv)
- continue;
- if (!LiveRegs[rx].Value) {
- setLiveReg(rx, pdv);
- continue;
- }
-
- // We have a live DomainValue from more than one predecessor.
- if (LiveRegs[rx].Value->isCollapsed()) {
- // We are already collapsed, but predecessor is not. Force it.
- unsigned Domain = LiveRegs[rx].Value->getFirstDomain();
- if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
- collapse(pdv, Domain);
- continue;
- }
-
- // Currently open, merge in predecessor.
- if (!pdv->isCollapsed())
- merge(LiveRegs[rx].Value, pdv);
- else
- force(rx, pdv->getFirstDomain());
- }
- }
- DEBUG(
- dbgs() << printMBBReference(*MBB)
- << (!isBlockDone(MBB) ? ": incomplete\n" : ": all preds known\n"));
-}
-
-void ExecutionDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
- assert(LiveRegs && "Must enter basic block first.");
- LiveReg *OldOutRegs = MBBInfos[MBB].OutRegs;
- // Save register clearances at end of MBB - used by enterBasicBlock().
- MBBInfos[MBB].OutRegs = LiveRegs;
-
- // While processing the basic block, we kept `Def` relative to the start
- // of the basic block for convenience. However, future use of this information
- // only cares about the clearance from the end of the block, so adjust
- // everything to be relative to the end of the basic block.
- for (unsigned i = 0, e = NumRegs; i != e; ++i)
- LiveRegs[i].Def -= CurInstr;
- if (OldOutRegs) {
- // This must be the second pass.
- // Release all the DomainValues instead of keeping them.
- for (unsigned i = 0, e = NumRegs; i != e; ++i)
- release(OldOutRegs[i].Value);
- delete[] OldOutRegs;
- }
- LiveRegs = nullptr;
-}
-
-bool ExecutionDepsFix::visitInstr(MachineInstr *MI) {
- // Update instructions with explicit execution domains.
- std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
- if (DomP.first) {
- if (DomP.second)
- visitSoftInstr(MI, DomP.second);
- else
- visitHardInstr(MI, DomP.first);
- }
-
- return !DomP.first;
-}
-
-/// \brief Helps avoid false dependencies on undef registers by updating the
-/// machine instructions' undef operand to use a register that the instruction
-/// is truly dependent on, or use a register with clearance higher than Pref.
-/// Returns true if it was able to find a true dependency, thus not requiring
-/// a dependency breaking instruction regardless of clearance.
-bool ExecutionDepsFix::pickBestRegisterForUndef(MachineInstr *MI,
- unsigned OpIdx, unsigned Pref) {
- MachineOperand &MO = MI->getOperand(OpIdx);
- assert(MO.isUndef() && "Expected undef machine operand");
-
- unsigned OriginalReg = MO.getReg();
-
- // Update only undef operands that are mapped to one register.
- if (AliasMap[OriginalReg].size() != 1)
- return false;
-
- // Get the undef operand's register class
- const TargetRegisterClass *OpRC =
- TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF);
-
- // If the instruction has a true dependency, we can hide the false depdency
- // behind it.
- for (MachineOperand &CurrMO : MI->operands()) {
- if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() ||
- !OpRC->contains(CurrMO.getReg()))
- continue;
- // We found a true dependency - replace the undef register with the true
- // dependency.
- MO.setReg(CurrMO.getReg());
- return true;
- }
-
- // Go over all registers in the register class and find the register with
- // max clearance or clearance higher than Pref.
- unsigned MaxClearance = 0;
- unsigned MaxClearanceReg = OriginalReg;
- ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC);
- for (auto Reg : Order) {
- assert(AliasMap[Reg].size() == 1 &&
- "Reg is expected to be mapped to a single index");
- int RCrx = *regIndices(Reg).begin();
- unsigned Clearance = CurInstr - LiveRegs[RCrx].Def;
- if (Clearance <= MaxClearance)
- continue;
- MaxClearance = Clearance;
- MaxClearanceReg = Reg;
-
- if (MaxClearance > Pref)
- break;
- }
-
- // Update the operand if we found a register with better clearance.
- if (MaxClearanceReg != OriginalReg)
- MO.setReg(MaxClearanceReg);
-
- return false;
-}
-
-/// \brief Return true to if it makes sense to break dependence on a partial def
-/// or undef use.
-bool ExecutionDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
- unsigned Pref) {
- unsigned reg = MI->getOperand(OpIdx).getReg();
- for (int rx : regIndices(reg)) {
- unsigned Clearance = CurInstr - LiveRegs[rx].Def;
- DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
-
- if (Pref > Clearance) {
- DEBUG(dbgs() << ": Break dependency.\n");
- continue;
- }
- DEBUG(dbgs() << ": OK .\n");
- return false;
- }
- return true;
-}
-
-// Update def-ages for registers defined by MI.
-// If Kill is set, also kill off DomainValues clobbered by the defs.
-//
-// Also break dependencies on partial defs and undef uses.
-void ExecutionDepsFix::processDefs(MachineInstr *MI, bool breakDependency,
- bool Kill) {
- assert(!MI->isDebugValue() && "Won't process debug values");
-
- // Break dependence on undef uses. Do this before updating LiveRegs below.
- unsigned OpNum;
- if (breakDependency) {
- unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
- if (Pref) {
- bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref);
- // We don't need to bother trying to break a dependency if this
- // instruction has a true dependency on that register through another
- // operand - we'll have to wait for it to be available regardless.
- if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref))
- UndefReads.push_back(std::make_pair(MI, OpNum));
- }
- }
- const MCInstrDesc &MCID = MI->getDesc();
- for (unsigned i = 0,
- e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
- i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- if (MO.isUse())
- continue;
- for (int rx : regIndices(MO.getReg())) {
- // This instruction explicitly defines rx.
- DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << CurInstr
- << '\t' << *MI);
-
- if (breakDependency) {
- // Check clearance before partial register updates.
- // Call breakDependence before setting LiveRegs[rx].Def.
- unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
- if (Pref && shouldBreakDependence(MI, i, Pref))
- TII->breakPartialRegDependency(*MI, i, TRI);
- }
-
- // How many instructions since rx was last written?
- LiveRegs[rx].Def = CurInstr;
-
- // Kill off domains redefined by generic instructions.
- if (Kill)
- kill(rx);
- }
- }
- ++CurInstr;
-}
-
-/// \break Break false dependencies on undefined register reads.
-///
-/// Walk the block backward computing precise liveness. This is expensive, so we
-/// only do it on demand. Note that the occurrence of undefined register reads
-/// that should be broken is very rare, but when they occur we may have many in
-/// a single block.
-void ExecutionDepsFix::processUndefReads(MachineBasicBlock *MBB) {
- if (UndefReads.empty())
- return;
-
- // Collect this block's live out register units.
- LiveRegSet.init(*TRI);
- // We do not need to care about pristine registers as they are just preserved
- // but not actually used in the function.
- LiveRegSet.addLiveOutsNoPristines(*MBB);
-
- MachineInstr *UndefMI = UndefReads.back().first;
- unsigned OpIdx = UndefReads.back().second;
-
- for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
- // Update liveness, including the current instruction's defs.
- LiveRegSet.stepBackward(I);
-
- if (UndefMI == &I) {
- if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
- TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI);
-
- UndefReads.pop_back();
- if (UndefReads.empty())
- return;
-
- UndefMI = UndefReads.back().first;
- OpIdx = UndefReads.back().second;
- }
- }
-}
-
-// A hard instruction only works in one domain. All input registers will be
-// forced into that domain.
-void ExecutionDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
- // Collapse all uses.
- for (unsigned i = mi->getDesc().getNumDefs(),
- e = mi->getDesc().getNumOperands(); i != e; ++i) {
- MachineOperand &mo = mi->getOperand(i);
- if (!mo.isReg()) continue;
- for (int rx : regIndices(mo.getReg())) {
- force(rx, domain);
- }
- }
-
- // Kill all defs and force them.
- for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
- MachineOperand &mo = mi->getOperand(i);
- if (!mo.isReg()) continue;
- for (int rx : regIndices(mo.getReg())) {
- kill(rx);
- force(rx, domain);
- }
- }
-}
-
-// A soft instruction can be changed to work in other domains given by mask.
-void ExecutionDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
- // Bitmask of available domains for this instruction after taking collapsed
- // operands into account.
- unsigned available = mask;
-
- // Scan the explicit use operands for incoming domains.
- SmallVector<int, 4> used;
- if (LiveRegs)
- for (unsigned i = mi->getDesc().getNumDefs(),
- e = mi->getDesc().getNumOperands(); i != e; ++i) {
- MachineOperand &mo = mi->getOperand(i);
- if (!mo.isReg()) continue;
- for (int rx : regIndices(mo.getReg())) {
- DomainValue *dv = LiveRegs[rx].Value;
- if (dv == nullptr)
- continue;
- // Bitmask of domains that dv and available have in common.
- unsigned common = dv->getCommonDomains(available);
- // Is it possible to use this collapsed register for free?
- if (dv->isCollapsed()) {
- // Restrict available domains to the ones in common with the operand.
- // If there are no common domains, we must pay the cross-domain
- // penalty for this operand.
- if (common) available = common;
- } else if (common)
- // Open DomainValue is compatible, save it for merging.
- used.push_back(rx);
- else
- // Open DomainValue is not compatible with instruction. It is useless
- // now.
- kill(rx);
- }
- }
-
- // If the collapsed operands force a single domain, propagate the collapse.
- if (isPowerOf2_32(available)) {
- unsigned domain = countTrailingZeros(available);
- TII->setExecutionDomain(*mi, domain);
- visitHardInstr(mi, domain);
- return;
- }
-
- // Kill off any remaining uses that don't match available, and build a list of
- // incoming DomainValues that we want to merge.
- SmallVector<const LiveReg *, 4> Regs;
- for (int rx : used) {
- assert(LiveRegs && "no space allocated for live registers");
- const LiveReg &LR = LiveRegs[rx];
- // This useless DomainValue could have been missed above.
- if (!LR.Value->getCommonDomains(available)) {
- kill(rx);
- continue;
- }
- // Sorted insertion.
- auto I = std::upper_bound(Regs.begin(), Regs.end(), &LR,
- [](const LiveReg *LHS, const LiveReg *RHS) {
- return LHS->Def < RHS->Def;
- });
- Regs.insert(I, &LR);
- }
-
- // doms are now sorted in order of appearance. Try to merge them all, giving
- // priority to the latest ones.
- DomainValue *dv = nullptr;
- while (!Regs.empty()) {
- if (!dv) {
- dv = Regs.pop_back_val()->Value;
- // Force the first dv to match the current instruction.
- dv->AvailableDomains = dv->getCommonDomains(available);
- assert(dv->AvailableDomains && "Domain should have been filtered");
- continue;
- }
-
- DomainValue *Latest = Regs.pop_back_val()->Value;
- // Skip already merged values.
- if (Latest == dv || Latest->Next)
- continue;
- if (merge(dv, Latest))
- continue;
-
- // If latest didn't merge, it is useless now. Kill all registers using it.
- for (int i : used) {
- assert(LiveRegs && "no space allocated for live registers");
- if (LiveRegs[i].Value == Latest)
- kill(i);
- }
- }
-
- // dv is the DomainValue we are going to use for this instruction.
- if (!dv) {
- dv = alloc();
- dv->AvailableDomains = available;
- }
- dv->Instrs.push_back(mi);
-
- // Finally set all defs and non-collapsed uses to dv. We must iterate through
- // all the operators, including imp-def ones.
- for (MachineInstr::mop_iterator ii = mi->operands_begin(),
- ee = mi->operands_end();
- ii != ee; ++ii) {
- MachineOperand &mo = *ii;
- if (!mo.isReg()) continue;
- for (int rx : regIndices(mo.getReg())) {
- if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
- kill(rx);
- setLiveReg(rx, dv);
- }
- }
- }
-}
-
-void ExecutionDepsFix::processBasicBlock(MachineBasicBlock *MBB,
- bool PrimaryPass) {
- enterBasicBlock(MBB);
- // If this block is not done, it makes little sense to make any decisions
- // based on clearance information. We need to make a second pass anyway,
- // and by then we'll have better information, so we can avoid doing the work
- // to try and break dependencies now.
- bool breakDependency = isBlockDone(MBB);
- for (MachineInstr &MI : *MBB) {
- if (!MI.isDebugValue()) {
- bool Kill = false;
- if (PrimaryPass)
- Kill = visitInstr(&MI);
- processDefs(&MI, breakDependency, Kill);
- }
- }
- if (breakDependency)
- processUndefReads(MBB);
- leaveBasicBlock(MBB);
-}
-
-bool ExecutionDepsFix::isBlockDone(MachineBasicBlock *MBB) {
- return MBBInfos[MBB].PrimaryCompleted &&
- MBBInfos[MBB].IncomingCompleted == MBBInfos[MBB].PrimaryIncoming &&
- MBBInfos[MBB].IncomingProcessed == MBB->pred_size();
-}
-
-bool ExecutionDepsFix::runOnMachineFunction(MachineFunction &mf) {
- if (skipFunction(mf.getFunction()))
- return false;
- MF = &mf;
- TII = MF->getSubtarget().getInstrInfo();
- TRI = MF->getSubtarget().getRegisterInfo();
- RegClassInfo.runOnMachineFunction(mf);
- LiveRegs = nullptr;
- assert(NumRegs == RC->getNumRegs() && "Bad regclass");
-
- DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: "
- << TRI->getRegClassName(RC) << " **********\n");
-
- // If no relevant registers are used in the function, we can skip it
- // completely.
- bool anyregs = false;
- const MachineRegisterInfo &MRI = mf.getRegInfo();
- for (unsigned Reg : *RC) {
- if (MRI.isPhysRegUsed(Reg)) {
- anyregs = true;
- break;
- }
- }
- if (!anyregs) return false;
-
- // Initialize the AliasMap on the first use.
- if (AliasMap.empty()) {
- // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and
- // therefore the LiveRegs array.
- AliasMap.resize(TRI->getNumRegs());
- for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
- for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true);
- AI.isValid(); ++AI)
- AliasMap[*AI].push_back(i);
- }
-
- // Initialize the MMBInfos
- for (auto &MBB : mf) {
- MBBInfo InitialInfo;
- MBBInfos.insert(std::make_pair(&MBB, InitialInfo));
- }
-
- /*
- * We want to visit every instruction in every basic block in order to update
- * it's execution domain or break any false dependencies. However, for the
- * dependency breaking, we need to know clearances from all predecessors
- * (including any backedges). One way to do so would be to do two complete
- * passes over all basic blocks/instructions, the first for recording
- * clearances, the second to break the dependencies. However, for functions
- * without backedges, or functions with a lot of straight-line code, and
- * a small loop, that would be a lot of unnecessary work (since only the
- * BBs that are part of the loop require two passes). As an example,
- * consider the following loop.
- *
- *
- * PH -> A -> B (xmm<Undef> -> xmm<Def>) -> C -> D -> EXIT
- * ^ |
- * +----------------------------------+
- *
- * The iteration order is as follows:
- * Naive: PH A B C D A' B' C' D'
- * Optimized: PH A B C A' B' C' D
- *
- * Note that we avoid processing D twice, because we can entirely process
- * the predecessors before getting to D. We call a block that is ready
- * for its second round of processing `done` (isBlockDone). Once we finish
- * processing some block, we update the counters in MBBInfos and re-process
- * any successors that are now done.
- */
-
- MachineBasicBlock *Entry = &*MF->begin();
- ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
- SmallVector<MachineBasicBlock *, 4> Workqueue;
- for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
- MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
- MachineBasicBlock *MBB = *MBBI;
- // N.B: IncomingProcessed and IncomingCompleted were already updated while
- // processing this block's predecessors.
- MBBInfos[MBB].PrimaryCompleted = true;
- MBBInfos[MBB].PrimaryIncoming = MBBInfos[MBB].IncomingProcessed;
- bool Primary = true;
- Workqueue.push_back(MBB);
- while (!Workqueue.empty()) {
- MachineBasicBlock *ActiveMBB = &*Workqueue.back();
- Workqueue.pop_back();
- processBasicBlock(ActiveMBB, Primary);
- bool Done = isBlockDone(ActiveMBB);
- for (auto *Succ : ActiveMBB->successors()) {
- if (!isBlockDone(Succ)) {
- if (Primary) {
- MBBInfos[Succ].IncomingProcessed++;
- }
- if (Done) {
- MBBInfos[Succ].IncomingCompleted++;
- }
- if (isBlockDone(Succ)) {
- Workqueue.push_back(Succ);
- }
- }
- }
- Primary = false;
- }
- }
-
- // We need to go through again and finalize any blocks that are not done yet.
- // This is possible if blocks have dead predecessors, so we didn't visit them
- // above.
- for (ReversePostOrderTraversal<MachineBasicBlock *>::rpo_iterator
- MBBI = RPOT.begin(),
- MBBE = RPOT.end();
- MBBI != MBBE; ++MBBI) {
- MachineBasicBlock *MBB = *MBBI;
- if (!isBlockDone(MBB)) {
- processBasicBlock(MBB, false);
- // Don't update successors here. We'll get to them anyway through this
- // loop.
- }
- }
-
- // Clear the LiveOuts vectors and collapse any remaining DomainValues.
- for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
- MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
- auto FI = MBBInfos.find(*MBBI);
- if (FI == MBBInfos.end() || !FI->second.OutRegs)
- continue;
- for (unsigned i = 0, e = NumRegs; i != e; ++i)
- if (FI->second.OutRegs[i].Value)
- release(FI->second.OutRegs[i].Value);
- delete[] FI->second.OutRegs;
- }
- MBBInfos.clear();
- UndefReads.clear();
- Avail.clear();
- Allocator.DestroyAll();
-
- return false;
-}
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDomainFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDomainFix.cpp
new file mode 100644
index 000000000000..458dcf2b0e26
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ExecutionDomainFix.cpp
@@ -0,0 +1,473 @@
+//===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ExecutionDomainFix.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "execution-deps-fix"
+
+iterator_range<SmallVectorImpl<int>::const_iterator>
+ExecutionDomainFix::regIndices(unsigned Reg) const {
+ assert(Reg < AliasMap.size() && "Invalid register");
+ const auto &Entry = AliasMap[Reg];
+ return make_range(Entry.begin(), Entry.end());
+}
+
+DomainValue *ExecutionDomainFix::alloc(int domain) {
+ DomainValue *dv = Avail.empty() ? new (Allocator.Allocate()) DomainValue
+ : Avail.pop_back_val();
+ if (domain >= 0)
+ dv->addDomain(domain);
+ assert(dv->Refs == 0 && "Reference count wasn't cleared");
+ assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
+ return dv;
+}
+
+void ExecutionDomainFix::release(DomainValue *DV) {
+ while (DV) {
+ assert(DV->Refs && "Bad DomainValue");
+ if (--DV->Refs)
+ return;
+
+ // There are no more DV references. Collapse any contained instructions.
+ if (DV->AvailableDomains && !DV->isCollapsed())
+ collapse(DV, DV->getFirstDomain());
+
+ DomainValue *Next = DV->Next;
+ DV->clear();
+ Avail.push_back(DV);
+ // Also release the next DomainValue in the chain.
+ DV = Next;
+ }
+}
+
+DomainValue *ExecutionDomainFix::resolve(DomainValue *&DVRef) {
+ DomainValue *DV = DVRef;
+ if (!DV || !DV->Next)
+ return DV;
+
+ // DV has a chain. Find the end.
+ do
+ DV = DV->Next;
+ while (DV->Next);
+
+ // Update DVRef to point to DV.
+ retain(DV);
+ release(DVRef);
+ DVRef = DV;
+ return DV;
+}
+
+void ExecutionDomainFix::setLiveReg(int rx, DomainValue *dv) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ assert(!LiveRegs.empty() && "Must enter basic block first.");
+
+ if (LiveRegs[rx] == dv)
+ return;
+ if (LiveRegs[rx])
+ release(LiveRegs[rx]);
+ LiveRegs[rx] = retain(dv);
+}
+
+void ExecutionDomainFix::kill(int rx) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ assert(!LiveRegs.empty() && "Must enter basic block first.");
+ if (!LiveRegs[rx])
+ return;
+
+ release(LiveRegs[rx]);
+ LiveRegs[rx] = nullptr;
+}
+
+void ExecutionDomainFix::force(int rx, unsigned domain) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ assert(!LiveRegs.empty() && "Must enter basic block first.");
+ if (DomainValue *dv = LiveRegs[rx]) {
+ if (dv->isCollapsed())
+ dv->addDomain(domain);
+ else if (dv->hasDomain(domain))
+ collapse(dv, domain);
+ else {
+ // This is an incompatible open DomainValue. Collapse it to whatever and
+ // force the new value into domain. This costs a domain crossing.
+ collapse(dv, dv->getFirstDomain());
+ assert(LiveRegs[rx] && "Not live after collapse?");
+ LiveRegs[rx]->addDomain(domain);
+ }
+ } else {
+ // Set up basic collapsed DomainValue.
+ setLiveReg(rx, alloc(domain));
+ }
+}
+
+void ExecutionDomainFix::collapse(DomainValue *dv, unsigned domain) {
+ assert(dv->hasDomain(domain) && "Cannot collapse");
+
+ // Collapse all the instructions.
+ while (!dv->Instrs.empty())
+ TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain);
+ dv->setSingleDomain(domain);
+
+ // If there are multiple users, give them new, unique DomainValues.
+ if (!LiveRegs.empty() && dv->Refs > 1)
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx] == dv)
+ setLiveReg(rx, alloc(domain));
+}
+
+bool ExecutionDomainFix::merge(DomainValue *A, DomainValue *B) {
+ assert(!A->isCollapsed() && "Cannot merge into collapsed");
+ assert(!B->isCollapsed() && "Cannot merge from collapsed");
+ if (A == B)
+ return true;
+ // Restrict to the domains that A and B have in common.
+ unsigned common = A->getCommonDomains(B->AvailableDomains);
+ if (!common)
+ return false;
+ A->AvailableDomains = common;
+ A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+
+ // Clear the old DomainValue so we won't try to swizzle instructions twice.
+ B->clear();
+ // All uses of B are referred to A.
+ B->Next = retain(A);
+
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ assert(!LiveRegs.empty() && "no space allocated for live registers");
+ if (LiveRegs[rx] == B)
+ setLiveReg(rx, A);
+ }
+ return true;
+}
+
+void ExecutionDomainFix::enterBasicBlock(
+ const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
+
+ MachineBasicBlock *MBB = TraversedMBB.MBB;
+
+ // Set up LiveRegs to represent registers entering MBB.
+ // Set default domain values to 'no domain' (nullptr)
+ if (LiveRegs.empty())
+ LiveRegs.assign(NumRegs, nullptr);
+
+ // This is the entry block.
+ if (MBB->pred_empty()) {
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n");
+ return;
+ }
+
+ // Try to coalesce live-out registers from predecessors.
+ for (MachineBasicBlock *pred : MBB->predecessors()) {
+ assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() &&
+ "Should have pre-allocated MBBInfos for all MBBs");
+ LiveRegsDVInfo &Incoming = MBBOutRegsInfos[pred->getNumber()];
+ // Incoming is null if this is a backedge from a BB
+ // we haven't processed yet
+ if (Incoming.empty())
+ continue;
+
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ DomainValue *pdv = resolve(Incoming[rx]);
+ if (!pdv)
+ continue;
+ if (!LiveRegs[rx]) {
+ setLiveReg(rx, pdv);
+ continue;
+ }
+
+ // We have a live DomainValue from more than one predecessor.
+ if (LiveRegs[rx]->isCollapsed()) {
+ // We are already collapsed, but predecessor is not. Force it.
+ unsigned Domain = LiveRegs[rx]->getFirstDomain();
+ if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
+ collapse(pdv, Domain);
+ continue;
+ }
+
+ // Currently open, merge in predecessor.
+ if (!pdv->isCollapsed())
+ merge(LiveRegs[rx], pdv);
+ else
+ force(rx, pdv->getFirstDomain());
+ }
+ }
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB)
+ << (!TraversedMBB.IsDone ? ": incomplete\n"
+ : ": all preds known\n"));
+}
+
+void ExecutionDomainFix::leaveBasicBlock(
+ const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
+ assert(!LiveRegs.empty() && "Must enter basic block first.");
+ unsigned MBBNumber = TraversedMBB.MBB->getNumber();
+ assert(MBBNumber < MBBOutRegsInfos.size() &&
+ "Unexpected basic block number.");
+ // Save register clearances at end of MBB - used by enterBasicBlock().
+ for (DomainValue *OldLiveReg : MBBOutRegsInfos[MBBNumber]) {
+ release(OldLiveReg);
+ }
+ MBBOutRegsInfos[MBBNumber] = LiveRegs;
+ LiveRegs.clear();
+}
+
+bool ExecutionDomainFix::visitInstr(MachineInstr *MI) {
+ // Update instructions with explicit execution domains.
+ std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
+ if (DomP.first) {
+ if (DomP.second)
+ visitSoftInstr(MI, DomP.second);
+ else
+ visitHardInstr(MI, DomP.first);
+ }
+
+ return !DomP.first;
+}
+
+void ExecutionDomainFix::processDefs(MachineInstr *MI, bool Kill) {
+ assert(!MI->isDebugInstr() && "Won't process debug values");
+ const MCInstrDesc &MCID = MI->getDesc();
+ for (unsigned i = 0,
+ e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
+ i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isUse())
+ continue;
+ for (int rx : regIndices(MO.getReg())) {
+ // This instruction explicitly defines rx.
+ LLVM_DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << *MI);
+
+ // Kill off domains redefined by generic instructions.
+ if (Kill)
+ kill(rx);
+ }
+ }
+}
+
+void ExecutionDomainFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
+ // Collapse all uses.
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands();
+ i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg())
+ continue;
+ for (int rx : regIndices(mo.getReg())) {
+ force(rx, domain);
+ }
+ }
+
+ // Kill all defs and force them.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg())
+ continue;
+ for (int rx : regIndices(mo.getReg())) {
+ kill(rx);
+ force(rx, domain);
+ }
+ }
+}
+
+void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+ // Bitmask of available domains for this instruction after taking collapsed
+ // operands into account.
+ unsigned available = mask;
+
+ // Scan the explicit use operands for incoming domains.
+ SmallVector<int, 4> used;
+ if (!LiveRegs.empty())
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands();
+ i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg())
+ continue;
+ for (int rx : regIndices(mo.getReg())) {
+ DomainValue *dv = LiveRegs[rx];
+ if (dv == nullptr)
+ continue;
+ // Bitmask of domains that dv and available have in common.
+ unsigned common = dv->getCommonDomains(available);
+ // Is it possible to use this collapsed register for free?
+ if (dv->isCollapsed()) {
+ // Restrict available domains to the ones in common with the operand.
+ // If there are no common domains, we must pay the cross-domain
+ // penalty for this operand.
+ if (common)
+ available = common;
+ } else if (common)
+ // Open DomainValue is compatible, save it for merging.
+ used.push_back(rx);
+ else
+ // Open DomainValue is not compatible with instruction. It is useless
+ // now.
+ kill(rx);
+ }
+ }
+
+ // If the collapsed operands force a single domain, propagate the collapse.
+ if (isPowerOf2_32(available)) {
+ unsigned domain = countTrailingZeros(available);
+ TII->setExecutionDomain(*mi, domain);
+ visitHardInstr(mi, domain);
+ return;
+ }
+
+ // Kill off any remaining uses that don't match available, and build a list of
+ // incoming DomainValues that we want to merge.
+ SmallVector<int, 4> Regs;
+ for (int rx : used) {
+ assert(!LiveRegs.empty() && "no space allocated for live registers");
+ DomainValue *&LR = LiveRegs[rx];
+ // This useless DomainValue could have been missed above.
+ if (!LR->getCommonDomains(available)) {
+ kill(rx);
+ continue;
+ }
+ // Sorted insertion.
+ // Enables giving priority to the latest domains during merging.
+ auto I = std::upper_bound(
+ Regs.begin(), Regs.end(), rx, [&](int LHS, const int RHS) {
+ return RDA->getReachingDef(mi, RC->getRegister(LHS)) <
+ RDA->getReachingDef(mi, RC->getRegister(RHS));
+ });
+ Regs.insert(I, rx);
+ }
+
+ // doms are now sorted in order of appearance. Try to merge them all, giving
+ // priority to the latest ones.
+ DomainValue *dv = nullptr;
+ while (!Regs.empty()) {
+ if (!dv) {
+ dv = LiveRegs[Regs.pop_back_val()];
+ // Force the first dv to match the current instruction.
+ dv->AvailableDomains = dv->getCommonDomains(available);
+ assert(dv->AvailableDomains && "Domain should have been filtered");
+ continue;
+ }
+
+ DomainValue *Latest = LiveRegs[Regs.pop_back_val()];
+ // Skip already merged values.
+ if (Latest == dv || Latest->Next)
+ continue;
+ if (merge(dv, Latest))
+ continue;
+
+ // If latest didn't merge, it is useless now. Kill all registers using it.
+ for (int i : used) {
+ assert(!LiveRegs.empty() && "no space allocated for live registers");
+ if (LiveRegs[i] == Latest)
+ kill(i);
+ }
+ }
+
+ // dv is the DomainValue we are going to use for this instruction.
+ if (!dv) {
+ dv = alloc();
+ dv->AvailableDomains = available;
+ }
+ dv->Instrs.push_back(mi);
+
+ // Finally set all defs and non-collapsed uses to dv. We must iterate through
+ // all the operators, including imp-def ones.
+ for (MachineOperand &mo : mi->operands()) {
+ if (!mo.isReg())
+ continue;
+ for (int rx : regIndices(mo.getReg())) {
+ if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx] != dv)) {
+ kill(rx);
+ setLiveReg(rx, dv);
+ }
+ }
+ }
+}
+
+void ExecutionDomainFix::processBasicBlock(
+ const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
+ enterBasicBlock(TraversedMBB);
+ // If this block is not done, it makes little sense to make any decisions
+ // based on clearance information. We need to make a second pass anyway,
+ // and by then we'll have better information, so we can avoid doing the work
+ // to try and break dependencies now.
+ for (MachineInstr &MI : *TraversedMBB.MBB) {
+ if (!MI.isDebugInstr()) {
+ bool Kill = false;
+ if (TraversedMBB.PrimaryPass)
+ Kill = visitInstr(&MI);
+ processDefs(&MI, Kill);
+ }
+ }
+ leaveBasicBlock(TraversedMBB);
+}
+
+bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(mf.getFunction()))
+ return false;
+ MF = &mf;
+ TII = MF->getSubtarget().getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ LiveRegs.clear();
+ assert(NumRegs == RC->getNumRegs() && "Bad regclass");
+
+ LLVM_DEBUG(dbgs() << "********** FIX EXECUTION DOMAIN: "
+ << TRI->getRegClassName(RC) << " **********\n");
+
+ // If no relevant registers are used in the function, we can skip it
+ // completely.
+ bool anyregs = false;
+ const MachineRegisterInfo &MRI = mf.getRegInfo();
+ for (unsigned Reg : *RC) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ anyregs = true;
+ break;
+ }
+ }
+ if (!anyregs)
+ return false;
+
+ RDA = &getAnalysis<ReachingDefAnalysis>();
+
+ // Initialize the AliasMap on the first use.
+ if (AliasMap.empty()) {
+ // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and
+ // therefore the LiveRegs array.
+ AliasMap.resize(TRI->getNumRegs());
+ for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
+ for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid();
+ ++AI)
+ AliasMap[*AI].push_back(i);
+ }
+
+ // Initialize the MBBOutRegsInfos
+ MBBOutRegsInfos.resize(mf.getNumBlockIDs());
+
+ // Traverse the basic blocks.
+ LoopTraversal Traversal;
+ LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf);
+ for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) {
+ processBasicBlock(TraversedMBB);
+ }
+
+ for (LiveRegsDVInfo OutLiveRegs : MBBOutRegsInfos) {
+ for (DomainValue *OutLiveReg : OutLiveRegs) {
+ if (OutLiveReg)
+ release(OutLiveReg);
+ }
+ }
+ MBBOutRegsInfos.clear();
+ Avail.clear();
+ Allocator.DestroyAll();
+
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 09c808463a41..d7562cbf1e90 100644
--- a/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -32,7 +32,7 @@ STATISTIC(NumMemCmpGreaterThanMax,
"Number of memcmp calls with size greater than max size");
STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
-static cl::opt<unsigned> MemCmpNumLoadsPerBlock(
+static cl::opt<unsigned> MemCmpEqZeroNumLoadsPerBlock(
"memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
cl::desc("The number of loads per basic block for inline expansion of "
"memcmp that is only being compared against zero."));
@@ -56,7 +56,7 @@ class MemCmpExpansion {
const uint64_t Size;
unsigned MaxLoadSize;
uint64_t NumLoadsNonOneByte;
- const uint64_t NumLoadsPerBlock;
+ const uint64_t NumLoadsPerBlockForZeroCmp;
std::vector<BasicBlock *> LoadCmpBlocks;
BasicBlock *EndBlock;
PHINode *PhiRes;
@@ -102,7 +102,7 @@ class MemCmpExpansion {
MemCmpExpansion(CallInst *CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
- unsigned NumLoadsPerBlock, const DataLayout &DL);
+ unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout);
unsigned getNumBlocks();
uint64_t getNumLoads() const { return LoadSequence.size(); }
@@ -122,12 +122,12 @@ MemCmpExpansion::MemCmpExpansion(
CallInst *const CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
const unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
- const unsigned NumLoadsPerBlock, const DataLayout &TheDataLayout)
+ const unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout)
: CI(CI),
Size(Size),
MaxLoadSize(0),
NumLoadsNonOneByte(0),
- NumLoadsPerBlock(NumLoadsPerBlock),
+ NumLoadsPerBlockForZeroCmp(MaxLoadsPerBlockForZeroCmp),
IsUsedForZeroCmp(IsUsedForZeroCmp),
DL(TheDataLayout),
Builder(CI) {
@@ -171,8 +171,8 @@ MemCmpExpansion::MemCmpExpansion(
unsigned MemCmpExpansion::getNumBlocks() {
if (IsUsedForZeroCmp)
- return getNumLoads() / NumLoadsPerBlock +
- (getNumLoads() % NumLoadsPerBlock != 0 ? 1 : 0);
+ return getNumLoads() / NumLoadsPerBlockForZeroCmp +
+ (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);
return getNumLoads();
}
@@ -249,7 +249,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
Value *Diff;
const unsigned NumLoads =
- std::min(getNumLoads() - LoadIndex, NumLoadsPerBlock);
+ std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
// For a single-block expansion, start inserting before the memcmp call.
if (LoadCmpBlocks.empty())
@@ -519,8 +519,6 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
/// A memcmp expansion that only has one block of load and compare can bypass
/// the compare, branch, and phi IR that is required in the general case.
Value *MemCmpExpansion::getMemCmpOneBlock() {
- assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block");
-
Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
Value *Source1 = CI->getArgOperand(0);
Value *Source2 = CI->getArgOperand(1);
@@ -566,11 +564,8 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {
// This function expands the memcmp call into an inline expansion and returns
// the memcmp result.
Value *MemCmpExpansion::getMemCmpExpansion() {
- // A memcmp with zero-comparison with only one block of load and compare does
- // not need to set up any extra blocks. This case could be handled in the DAG,
- // but since we have all of the machinery to flexibly expand any memcpy here,
- // we choose to handle this case too to avoid fragmented lowering.
- if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || getNumBlocks() != 1) {
+ // Create the basic block framework for a multi-block expansion.
+ if (getNumBlocks() != 1) {
BasicBlock *StartBlock = CI->getParent();
EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
setupEndBlockPHINodes();
@@ -596,8 +591,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
: getMemCmpExpansionZeroCase();
- // TODO: Handle more than one load pair per block in getMemCmpOneBlock().
- if (getNumBlocks() == 1 && NumLoadsPerBlock == 1) return getMemCmpOneBlock();
+ if (getNumBlocks() == 1)
+ return getMemCmpOneBlock();
for (unsigned I = 0; I < getNumBlocks(); ++I) {
emitLoadCompareBlock(I);
@@ -709,8 +704,12 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
const unsigned MaxNumLoads =
TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize());
+ unsigned NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences()
+ ? MemCmpEqZeroNumLoadsPerBlock
+ : TLI->getMemcmpEqZeroLoadsPerBlock();
+
MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads,
- IsUsedForZeroCmp, MemCmpNumLoadsPerBlock, *DL);
+ IsUsedForZeroCmp, NumLoadsPerBlock, *DL);
// Don't expand if this will require more loads than desired by the target.
if (Expansion.getNumLoads() == 0) {
diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index 6ef97d6dd5ec..bc747fc610f8 100644
--- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -93,11 +93,11 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
"Inserted value must be in a physical register");
- DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
+ LLVM_DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
if (MI->allDefsAreDead()) {
MI->setDesc(TII->get(TargetOpcode::KILL));
- DEBUG(dbgs() << "subreg: replaced by: " << *MI);
+ LLVM_DEBUG(dbgs() << "subreg: replaced by: " << *MI);
return true;
}
@@ -110,10 +110,10 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
MI->setDesc(TII->get(TargetOpcode::KILL));
MI->RemoveOperand(3); // SubIdx
MI->RemoveOperand(1); // Imm
- DEBUG(dbgs() << "subreg: replace by: " << *MI);
+ LLVM_DEBUG(dbgs() << "subreg: replace by: " << *MI);
return true;
}
- DEBUG(dbgs() << "subreg: eliminated!");
+ LLVM_DEBUG(dbgs() << "subreg: eliminated!");
} else {
TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
MI->getOperand(2).isKill());
@@ -122,10 +122,10 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
MachineBasicBlock::iterator CopyMI = MI;
--CopyMI;
CopyMI->addRegisterDefined(DstReg);
- DEBUG(dbgs() << "subreg: " << *CopyMI);
+ LLVM_DEBUG(dbgs() << "subreg: " << *CopyMI);
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
MBB->erase(MI);
return true;
}
@@ -133,9 +133,9 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
if (MI->allDefsAreDead()) {
- DEBUG(dbgs() << "dead copy: " << *MI);
+ LLVM_DEBUG(dbgs() << "dead copy: " << *MI);
MI->setDesc(TII->get(TargetOpcode::KILL));
- DEBUG(dbgs() << "replaced by: " << *MI);
+ LLVM_DEBUG(dbgs() << "replaced by: " << *MI);
return true;
}
@@ -144,14 +144,15 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
bool IdentityCopy = (SrcMO.getReg() == DstMO.getReg());
if (IdentityCopy || SrcMO.isUndef()) {
- DEBUG(dbgs() << (IdentityCopy ? "identity copy: " : "undef copy: ") << *MI);
+ LLVM_DEBUG(dbgs() << (IdentityCopy ? "identity copy: " : "undef copy: ")
+ << *MI);
// No need to insert an identity copy instruction, but replace with a KILL
// if liveness is changed.
if (SrcMO.isUndef() || MI->getNumOperands() > 2) {
// We must make sure the super-register gets killed. Replace the
// instruction with KILL.
MI->setDesc(TII->get(TargetOpcode::KILL));
- DEBUG(dbgs() << "replaced by: " << *MI);
+ LLVM_DEBUG(dbgs() << "replaced by: " << *MI);
return true;
}
// Vanilla identity copy.
@@ -159,13 +160,13 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
return true;
}
- DEBUG(dbgs() << "real copy: " << *MI);
+ LLVM_DEBUG(dbgs() << "real copy: " << *MI);
TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),
DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
if (MI->getNumOperands() > 2)
TransferImplicitOperands(MI);
- DEBUG({
+ LLVM_DEBUG({
MachineBasicBlock::iterator dMI = MI;
dbgs() << "replaced by: " << *(--dMI);
});
@@ -177,9 +178,9 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
/// copies.
///
bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "Machine Function\n"
- << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n"
- << "********** Function: " << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Machine Function\n"
+ << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n"
+ << "********** Function: " << MF.getName() << '\n');
TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
diff --git a/contrib/llvm/lib/CodeGen/ExpandReductions.cpp b/contrib/llvm/lib/CodeGen/ExpandReductions.cpp
index abf487a4f198..7552ba8cd85d 100644
--- a/contrib/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/contrib/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -78,13 +78,15 @@ RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
bool Changed = false;
- SmallVector<IntrinsicInst*, 4> Worklist;
+ SmallVector<IntrinsicInst *, 4> Worklist;
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
if (auto II = dyn_cast<IntrinsicInst>(&*I))
Worklist.push_back(II);
for (auto *II : Worklist) {
IRBuilder<> Builder(II);
+ bool IsOrdered = false;
+ Value *Acc = nullptr;
Value *Vec = nullptr;
auto ID = II->getIntrinsicID();
auto MRK = RecurrenceDescriptor::MRK_Invalid;
@@ -92,11 +94,10 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
case Intrinsic::experimental_vector_reduce_fadd:
case Intrinsic::experimental_vector_reduce_fmul:
// FMFs must be attached to the call, otherwise it's an ordered reduction
- // and it can't be handled by generating this shuffle sequence.
- // TODO: Implement scalarization of ordered reductions here for targets
- // without native support.
+ // and it can't be handled by generating a shuffle sequence.
if (!II->getFastMathFlags().isFast())
- continue;
+ IsOrdered = true;
+ Acc = II->getArgOperand(0);
Vec = II->getArgOperand(1);
break;
case Intrinsic::experimental_vector_reduce_add:
@@ -118,7 +119,9 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
}
if (!TTI->shouldExpandReduction(II))
continue;
- auto Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ Value *Rdx =
+ IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
+ : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
II->replaceAllUsesWith(Rdx);
II->eraseFromParent();
Changed = true;
diff --git a/contrib/llvm/lib/CodeGen/FaultMaps.cpp b/contrib/llvm/lib/CodeGen/FaultMaps.cpp
index 2924b011e0c1..361558a0e562 100644
--- a/contrib/llvm/lib/CodeGen/FaultMaps.cpp
+++ b/contrib/llvm/lib/CodeGen/FaultMaps.cpp
@@ -62,17 +62,17 @@ void FaultMaps::serializeToFaultMapSection() {
// Emit a dummy symbol to force section inclusion.
OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps")));
- DEBUG(dbgs() << "********** Fault Map Output **********\n");
+ LLVM_DEBUG(dbgs() << "********** Fault Map Output **********\n");
// Header
OS.EmitIntValue(FaultMapVersion, 1); // Version.
OS.EmitIntValue(0, 1); // Reserved.
OS.EmitIntValue(0, 2); // Reserved.
- DEBUG(dbgs() << WFMP << "#functions = " << FunctionInfos.size() << "\n");
+ LLVM_DEBUG(dbgs() << WFMP << "#functions = " << FunctionInfos.size() << "\n");
OS.EmitIntValue(FunctionInfos.size(), 4);
- DEBUG(dbgs() << WFMP << "functions:\n");
+ LLVM_DEBUG(dbgs() << WFMP << "functions:\n");
for (const auto &FFI : FunctionInfos)
emitFunctionInfo(FFI.first, FFI.second);
@@ -82,25 +82,25 @@ void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel,
const FunctionFaultInfos &FFI) {
MCStreamer &OS = *AP.OutStreamer;
- DEBUG(dbgs() << WFMP << " function addr: " << *FnLabel << "\n");
+ LLVM_DEBUG(dbgs() << WFMP << " function addr: " << *FnLabel << "\n");
OS.EmitSymbolValue(FnLabel, 8);
- DEBUG(dbgs() << WFMP << " #faulting PCs: " << FFI.size() << "\n");
+ LLVM_DEBUG(dbgs() << WFMP << " #faulting PCs: " << FFI.size() << "\n");
OS.EmitIntValue(FFI.size(), 4);
OS.EmitIntValue(0, 4); // Reserved
for (auto &Fault : FFI) {
- DEBUG(dbgs() << WFMP << " fault type: "
- << faultTypeToString(Fault.Kind) << "\n");
+ LLVM_DEBUG(dbgs() << WFMP << " fault type: "
+ << faultTypeToString(Fault.Kind) << "\n");
OS.EmitIntValue(Fault.Kind, 4);
- DEBUG(dbgs() << WFMP << " faulting PC offset: "
- << *Fault.FaultingOffsetExpr << "\n");
+ LLVM_DEBUG(dbgs() << WFMP << " faulting PC offset: "
+ << *Fault.FaultingOffsetExpr << "\n");
OS.EmitValue(Fault.FaultingOffsetExpr, 4);
- DEBUG(dbgs() << WFMP << " fault handler PC offset: "
- << *Fault.HandlerOffsetExpr << "\n");
+ LLVM_DEBUG(dbgs() << WFMP << " fault handler PC offset: "
+ << *Fault.HandlerOffsetExpr << "\n");
OS.EmitValue(Fault.HandlerOffsetExpr, 4);
}
}
diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
index 9c71b18619a1..581cd423f2d4 100644
--- a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
+++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
@@ -41,8 +41,11 @@ INITIALIZE_PASS(FuncletLayout, DEBUG_TYPE,
"Contiguously Lay Out Funclets", false, false)
bool FuncletLayout::runOnMachineFunction(MachineFunction &F) {
+ // Even though this gets information from getEHScopeMembership(), this pass is
+ // only necessary for funclet-based EH personalities, in which these EH scopes
+ // are outlined at the end.
DenseMap<const MachineBasicBlock *, int> FuncletMembership =
- getFuncletMembership(F);
+ getEHScopeMembership(F);
if (FuncletMembership.empty())
return false;
diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
index 4361d8b248c8..31ddeadbd97a 100644
--- a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -38,7 +38,7 @@ namespace {
/// directed by the GCStrategy. It also performs automatic root initialization
/// and custom intrinsic lowering.
class LowerIntrinsics : public FunctionPass {
- bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
+ bool PerformDefaultLowering(Function &F, GCStrategy &S);
public:
static char ID;
@@ -61,7 +61,7 @@ class GCMachineCodeAnalysis : public MachineFunctionPass {
const TargetInstrInfo *TII;
void FindSafePoints(MachineFunction &MF);
- void VisitCallPoint(MachineBasicBlock::iterator MI);
+ void VisitCallPoint(MachineBasicBlock::iterator CI);
MCSymbol *InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL) const;
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 114c068749eb..07de31bec660 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -38,6 +38,9 @@ bool CallLowering::lowerCall(
ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{},
i < NumFixedArgs};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS);
+ // We don't currently support swifterror or swiftself args.
+ if (OrigArg.Flags.isSwiftError() || OrigArg.Flags.isSwiftSelf())
+ return false;
OrigArgs.push_back(OrigArg);
++i;
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
new file mode 100644
index 000000000000..0bc5b87de150
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -0,0 +1,81 @@
+//===-- lib/CodeGen/GlobalISel/GICombiner.cpp -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file constains common code to combine machine functions at generic
+// level.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+
+Combiner::Combiner(CombinerInfo &Info, const TargetPassConfig *TPC)
+ : CInfo(Info), TPC(TPC) {
+ (void)this->TPC; // FIXME: Remove when used.
+}
+
+bool Combiner::combineMachineInstrs(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running this pass.
+ // FIXME: Should this be here or in individual combiner passes.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+
+ MRI = &MF.getRegInfo();
+ Builder.setMF(MF);
+
+ LLVM_DEBUG(dbgs() << "Generic MI Combiner for: " << MF.getName() << '\n');
+
+ MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
+
+ bool MFChanged = false;
+ bool Changed;
+
+ do {
+ // Collect all instructions. Do a post order traversal for basic blocks and
+ // insert with list bottom up, so while we pop_back_val, we'll traverse top
+ // down RPOT.
+ Changed = false;
+ GISelWorkList<512> WorkList;
+ for (MachineBasicBlock *MBB : post_order(&MF)) {
+ if (MBB->empty())
+ continue;
+ for (auto MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE;) {
+ MachineInstr *CurMI = &*MII;
+ ++MII;
+ // Erase dead insts before even adding to the list.
+ if (isTriviallyDead(*CurMI, *MRI)) {
+ LLVM_DEBUG(dbgs() << *CurMI << "Is dead; erasing.\n");
+ CurMI->eraseFromParentAndMarkDBGValuesForRemoval();
+ continue;
+ }
+ WorkList.insert(CurMI);
+ }
+ }
+ // Main Loop. Process the instructions here.
+ while (!WorkList.empty()) {
+ MachineInstr *CurrInst = WorkList.pop_back_val();
+ LLVM_DEBUG(dbgs() << "Try combining " << *CurrInst << "\n";);
+ Changed |= CInfo.combine(*CurrInst, Builder);
+ }
+ MFChanged |= Changed;
+ } while (Changed);
+
+ return MFChanged;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
new file mode 100644
index 000000000000..44e904a6391b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -0,0 +1,41 @@
+//== ---lib/CodeGen/GlobalISel/GICombinerHelper.cpp --------------------- == //
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define DEBUG_TYPE "gi-combine"
+
+using namespace llvm;
+
+CombinerHelper::CombinerHelper(MachineIRBuilder &B) :
+ Builder(B), MRI(Builder.getMF().getRegInfo()) {}
+
+bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
+ if (MI.getOpcode() != TargetOpcode::COPY)
+ return false;
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+ // Simple Copy Propagation.
+ // a(sx) = COPY b(sx) -> Replace all uses of a with b.
+ if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) {
+ MI.eraseFromParent();
+ MRI.replaceRegWith(DstReg, SrcReg);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::tryCombine(MachineInstr &MI) {
+ return tryCombineCopy(MI);
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index a329a71e2c95..bafb7a05536d 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -102,37 +103,103 @@ IRTranslator::IRTranslator() : MachineFunctionPass(ID) {
}
void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<StackProtector>();
AU.addRequired<TargetPassConfig>();
+ getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
-unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
- unsigned &ValReg = ValToVReg[&Val];
+static void computeValueLLTs(const DataLayout &DL, Type &Ty,
+ SmallVectorImpl<LLT> &ValueTys,
+ SmallVectorImpl<uint64_t> *Offsets = nullptr,
+ uint64_t StartingOffset = 0) {
+ // Given a struct type, recursively traverse the elements.
+ if (StructType *STy = dyn_cast<StructType>(&Ty)) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I)
+ computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets,
+ StartingOffset + SL->getElementOffset(I));
+ return;
+ }
+ // Given an array type, recursively traverse the elements.
+ if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) {
+ Type *EltTy = ATy->getElementType();
+ uint64_t EltSize = DL.getTypeAllocSize(EltTy);
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ computeValueLLTs(DL, *EltTy, ValueTys, Offsets,
+ StartingOffset + i * EltSize);
+ return;
+ }
+ // Interpret void as zero return values.
+ if (Ty.isVoidTy())
+ return;
+ // Base case: we can get an LLT for this LLVM IR type.
+ ValueTys.push_back(getLLTForType(Ty, DL));
+ if (Offsets != nullptr)
+ Offsets->push_back(StartingOffset * 8);
+}
+
+IRTranslator::ValueToVRegInfo::VRegListT &
+IRTranslator::allocateVRegs(const Value &Val) {
+ assert(!VMap.contains(Val) && "Value already allocated in VMap");
+ auto *Regs = VMap.getVRegs(Val);
+ auto *Offsets = VMap.getOffsets(Val);
+ SmallVector<LLT, 4> SplitTys;
+ computeValueLLTs(*DL, *Val.getType(), SplitTys,
+ Offsets->empty() ? Offsets : nullptr);
+ for (unsigned i = 0; i < SplitTys.size(); ++i)
+ Regs->push_back(0);
+ return *Regs;
+}
+
+ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) {
+ auto VRegsIt = VMap.findVRegs(Val);
+ if (VRegsIt != VMap.vregs_end())
+ return *VRegsIt->second;
- if (ValReg)
- return ValReg;
+ if (Val.getType()->isVoidTy())
+ return *VMap.getVRegs(Val);
+
+ // Create entry for this type.
+ auto *VRegs = VMap.getVRegs(Val);
+ auto *Offsets = VMap.getOffsets(Val);
- // Fill ValRegsSequence with the sequence of registers
- // we need to concat together to produce the value.
assert(Val.getType()->isSized() &&
"Don't know how to create an empty vreg");
- unsigned VReg =
- MRI->createGenericVirtualRegister(getLLTForType(*Val.getType(), *DL));
- ValReg = VReg;
- if (auto CV = dyn_cast<Constant>(&Val)) {
- bool Success = translate(*CV, VReg);
+ SmallVector<LLT, 4> SplitTys;
+ computeValueLLTs(*DL, *Val.getType(), SplitTys,
+ Offsets->empty() ? Offsets : nullptr);
+
+ if (!isa<Constant>(Val)) {
+ for (auto Ty : SplitTys)
+ VRegs->push_back(MRI->createGenericVirtualRegister(Ty));
+ return *VRegs;
+ }
+
+ if (Val.getType()->isAggregateType()) {
+ // UndefValue, ConstantAggregateZero
+ auto &C = cast<Constant>(Val);
+ unsigned Idx = 0;
+ while (auto Elt = C.getAggregateElement(Idx++)) {
+ auto EltRegs = getOrCreateVRegs(*Elt);
+ std::copy(EltRegs.begin(), EltRegs.end(), std::back_inserter(*VRegs));
+ }
+ } else {
+ assert(SplitTys.size() == 1 && "unexpectedly split LLT");
+ VRegs->push_back(MRI->createGenericVirtualRegister(SplitTys[0]));
+ bool Success = translate(cast<Constant>(Val), VRegs->front());
if (!Success) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
MF->getFunction().getSubprogram(),
&MF->getFunction().getEntryBlock());
R << "unable to translate constant: " << ore::NV("Type", Val.getType());
reportTranslationError(*MF, *TPC, *ORE, R);
- return VReg;
+ return *VRegs;
}
}
- return VReg;
+ return *VRegs;
}
int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
@@ -164,6 +231,20 @@ unsigned IRTranslator::getMemOpAlignment(const Instruction &I) {
} else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
Alignment = LI->getAlignment();
ValTy = LI->getType();
+ } else if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
+ // TODO(PR27168): This instruction has no alignment attribute, but unlike
+ // the default alignment for load/store, the default here is to assume
+ // it has NATURAL alignment, not DataLayout-specified alignment.
+ const DataLayout &DL = AI->getModule()->getDataLayout();
+ Alignment = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
+ ValTy = AI->getCompareOperand()->getType();
+ } else if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
+ // TODO(PR27168): This instruction has no alignment attribute, but unlike
+ // the default alignment for load/store, the default here is to assume
+ // it has NATURAL alignment, not DataLayout-specified alignment.
+ const DataLayout &DL = AI->getModule()->getDataLayout();
+ Alignment = DL.getTypeStoreSize(AI->getValOperand()->getType());
+ ValTy = AI->getType();
} else {
OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
R << "unable to translate memop: " << ore::NV("Opcode", &I);
@@ -243,7 +324,11 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
// The target may mess up with the insertion point, but
// this is not important as a return is the last instruction
// of the block anyway.
- return CLI->lowerReturn(MIRBuilder, Ret, !Ret ? 0 : getOrCreateVReg(*Ret));
+
+ // FIXME: this interface should simplify when CallLowering gets adapted to
+ // multiple VRegs per Value.
+ unsigned VReg = Ret ? packRegs(*Ret, MIRBuilder) : 0;
+ return CLI->lowerReturn(MIRBuilder, Ret, VReg);
}
bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
@@ -342,15 +427,23 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
if (DL->getTypeStoreSize(LI.getType()) == 0)
return true;
- unsigned Res = getOrCreateVReg(LI);
- unsigned Addr = getOrCreateVReg(*LI.getPointerOperand());
+ ArrayRef<unsigned> Regs = getOrCreateVRegs(LI);
+ ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI);
+ unsigned Base = getOrCreateVReg(*LI.getPointerOperand());
+
+ for (unsigned i = 0; i < Regs.size(); ++i) {
+ unsigned Addr = 0;
+ MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8);
+
+ MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
+ unsigned BaseAlign = getMemOpAlignment(LI);
+ auto MMO = MF->getMachineMemOperand(
+ Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8,
+ MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr,
+ LI.getSyncScopeID(), LI.getOrdering());
+ MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
+ }
- MIRBuilder.buildLoad(
- Res, Addr,
- *MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()),
- Flags, DL->getTypeStoreSize(LI.getType()),
- getMemOpAlignment(LI), AAMDNodes(), nullptr,
- LI.getSyncScopeID(), LI.getOrdering()));
return true;
}
@@ -363,50 +456,61 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
return true;
- unsigned Val = getOrCreateVReg(*SI.getValueOperand());
- unsigned Addr = getOrCreateVReg(*SI.getPointerOperand());
-
- MIRBuilder.buildStore(
- Val, Addr,
- *MF->getMachineMemOperand(
- MachinePointerInfo(SI.getPointerOperand()), Flags,
- DL->getTypeStoreSize(SI.getValueOperand()->getType()),
- getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSyncScopeID(),
- SI.getOrdering()));
+ ArrayRef<unsigned> Vals = getOrCreateVRegs(*SI.getValueOperand());
+ ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
+ unsigned Base = getOrCreateVReg(*SI.getPointerOperand());
+
+ for (unsigned i = 0; i < Vals.size(); ++i) {
+ unsigned Addr = 0;
+ MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8);
+
+ MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
+ unsigned BaseAlign = getMemOpAlignment(SI);
+ auto MMO = MF->getMachineMemOperand(
+ Ptr, Flags, (MRI->getType(Vals[i]).getSizeInBits() + 7) / 8,
+ MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr,
+ SI.getSyncScopeID(), SI.getOrdering());
+ MIRBuilder.buildStore(Vals[i], Addr, *MMO);
+ }
return true;
}
-bool IRTranslator::translateExtractValue(const User &U,
- MachineIRBuilder &MIRBuilder) {
+static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) {
const Value *Src = U.getOperand(0);
Type *Int32Ty = Type::getInt32Ty(U.getContext());
- SmallVector<Value *, 1> Indices;
-
- // If Src is a single element ConstantStruct, translate extractvalue
- // to that element to avoid inserting a cast instruction.
- if (auto CS = dyn_cast<ConstantStruct>(Src))
- if (CS->getNumOperands() == 1) {
- unsigned Res = getOrCreateVReg(*CS->getOperand(0));
- ValToVReg[&U] = Res;
- return true;
- }
// getIndexedOffsetInType is designed for GEPs, so the first index is the
// usual array element rather than looking into the actual aggregate.
+ SmallVector<Value *, 1> Indices;
Indices.push_back(ConstantInt::get(Int32Ty, 0));
if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) {
for (auto Idx : EVI->indices())
Indices.push_back(ConstantInt::get(Int32Ty, Idx));
+ } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) {
+ for (auto Idx : IVI->indices())
+ Indices.push_back(ConstantInt::get(Int32Ty, Idx));
} else {
for (unsigned i = 1; i < U.getNumOperands(); ++i)
Indices.push_back(U.getOperand(i));
}
- uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);
+ return 8 * static_cast<uint64_t>(
+ DL.getIndexedOffsetInType(Src->getType(), Indices));
+}
- unsigned Res = getOrCreateVReg(U);
- MIRBuilder.buildExtract(Res, getOrCreateVReg(*Src), Offset);
+bool IRTranslator::translateExtractValue(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const Value *Src = U.getOperand(0);
+ uint64_t Offset = getOffsetFromIndices(U, *DL);
+ ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src);
+ ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src);
+ unsigned Idx = std::lower_bound(Offsets.begin(), Offsets.end(), Offset) -
+ Offsets.begin();
+ auto &DstRegs = allocateVRegs(U);
+
+ for (unsigned i = 0; i < DstRegs.size(); ++i)
+ DstRegs[i] = SrcRegs[Idx++];
return true;
}
@@ -414,37 +518,33 @@ bool IRTranslator::translateExtractValue(const User &U,
bool IRTranslator::translateInsertValue(const User &U,
MachineIRBuilder &MIRBuilder) {
const Value *Src = U.getOperand(0);
- Type *Int32Ty = Type::getInt32Ty(U.getContext());
- SmallVector<Value *, 1> Indices;
-
- // getIndexedOffsetInType is designed for GEPs, so the first index is the
- // usual array element rather than looking into the actual aggregate.
- Indices.push_back(ConstantInt::get(Int32Ty, 0));
-
- if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) {
- for (auto Idx : IVI->indices())
- Indices.push_back(ConstantInt::get(Int32Ty, Idx));
- } else {
- for (unsigned i = 2; i < U.getNumOperands(); ++i)
- Indices.push_back(U.getOperand(i));
+ uint64_t Offset = getOffsetFromIndices(U, *DL);
+ auto &DstRegs = allocateVRegs(U);
+ ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U);
+ ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src);
+ ArrayRef<unsigned> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
+ auto InsertedIt = InsertedRegs.begin();
+
+ for (unsigned i = 0; i < DstRegs.size(); ++i) {
+ if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end())
+ DstRegs[i] = *InsertedIt++;
+ else
+ DstRegs[i] = SrcRegs[i];
}
- uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);
-
- unsigned Res = getOrCreateVReg(U);
- unsigned Inserted = getOrCreateVReg(*U.getOperand(1));
- MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), Inserted, Offset);
-
return true;
}
bool IRTranslator::translateSelect(const User &U,
MachineIRBuilder &MIRBuilder) {
- unsigned Res = getOrCreateVReg(U);
unsigned Tst = getOrCreateVReg(*U.getOperand(0));
- unsigned Op0 = getOrCreateVReg(*U.getOperand(1));
- unsigned Op1 = getOrCreateVReg(*U.getOperand(2));
- MIRBuilder.buildSelect(Res, Tst, Op0, Op1);
+ ArrayRef<unsigned> ResRegs = getOrCreateVRegs(U);
+ ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
+ ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
+
+ for (unsigned i = 0; i < ResRegs.size(); ++i)
+ MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]);
+
return true;
}
@@ -453,15 +553,16 @@ bool IRTranslator::translateBitCast(const User &U,
// If we're bitcasting to the source type, we can reuse the source vreg.
if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
getLLTForType(*U.getType(), *DL)) {
- // Get the source vreg now, to avoid invalidating ValToVReg.
unsigned SrcReg = getOrCreateVReg(*U.getOperand(0));
- unsigned &Reg = ValToVReg[&U];
+ auto &Regs = *VMap.getVRegs(U);
// If we already assigned a vreg for this bitcast, we can't change that.
// Emit a copy to satisfy the users we already emitted.
- if (Reg)
- MIRBuilder.buildCopy(Reg, SrcReg);
- else
- Reg = SrcReg;
+ if (!Regs.empty())
+ MIRBuilder.buildCopy(Regs[0], SrcReg);
+ else {
+ Regs.push_back(SrcReg);
+ VMap.getOffsets(U)->push_back(0);
+ }
return true;
}
return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
@@ -516,10 +617,6 @@ bool IRTranslator::translateGetElementPtr(const User &U,
Offset = 0;
}
- // N = N + Idx * ElementSize;
- unsigned ElementSizeReg =
- getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize));
-
unsigned IdxReg = getOrCreateVReg(*Idx);
if (MRI->getType(IdxReg) != OffsetTy) {
unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy);
@@ -527,11 +624,20 @@ bool IRTranslator::translateGetElementPtr(const User &U,
IdxReg = NewIdxReg;
}
- unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
- MIRBuilder.buildMul(OffsetReg, ElementSizeReg, IdxReg);
+ // N = N + Idx * ElementSize;
+ // Avoid doing it for ElementSize of 1.
+ unsigned GepOffsetReg;
+ if (ElementSize != 1) {
+ unsigned ElementSizeReg =
+ getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize));
+
+ GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
+ MIRBuilder.buildMul(GepOffsetReg, ElementSizeReg, IdxReg);
+ } else
+ GepOffsetReg = IdxReg;
unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
- MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg);
+ MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg);
BaseReg = NewBaseReg;
}
}
@@ -607,14 +713,10 @@ void IRTranslator::getStackGuard(unsigned DstReg,
bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
MachineIRBuilder &MIRBuilder) {
- LLT Ty = getLLTForType(*CI.getOperand(0)->getType(), *DL);
- LLT s1 = LLT::scalar(1);
- unsigned Width = Ty.getSizeInBits();
- unsigned Res = MRI->createGenericVirtualRegister(Ty);
- unsigned Overflow = MRI->createGenericVirtualRegister(s1);
+ ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI);
auto MIB = MIRBuilder.buildInstr(Op)
- .addDef(Res)
- .addDef(Overflow)
+ .addDef(ResRegs[0])
+ .addDef(ResRegs[1])
.addUse(getOrCreateVReg(*CI.getOperand(0)))
.addUse(getOrCreateVReg(*CI.getOperand(1)));
@@ -624,7 +726,6 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
MIB.addUse(Zero);
}
- MIRBuilder.buildSequence(getOrCreateVReg(CI), {Res, Overflow}, {0, Width});
return true;
}
@@ -647,7 +748,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
const Value *Address = DI.getAddress();
if (!Address || isa<UndefValue>(Address)) {
- DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
return true;
}
@@ -741,6 +842,11 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
.addDef(getOrCreateVReg(CI))
.addUse(getOrCreateVReg(*CI.getArgOperand(0)));
return true;
+ case Intrinsic::fabs:
+ MIRBuilder.buildInstr(TargetOpcode::G_FABS)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
case Intrinsic::fma:
MIRBuilder.buildInstr(TargetOpcode::G_FMA)
.addDef(getOrCreateVReg(CI))
@@ -748,6 +854,25 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
.addUse(getOrCreateVReg(*CI.getArgOperand(1)))
.addUse(getOrCreateVReg(*CI.getArgOperand(2)));
return true;
+ case Intrinsic::fmuladd: {
+ const TargetMachine &TM = MF->getTarget();
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+ unsigned Dst = getOrCreateVReg(CI);
+ unsigned Op0 = getOrCreateVReg(*CI.getArgOperand(0));
+ unsigned Op1 = getOrCreateVReg(*CI.getArgOperand(1));
+ unsigned Op2 = getOrCreateVReg(*CI.getArgOperand(2));
+ if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
+ TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) {
+ // TODO: Revisit this to see if we should move this part of the
+ // lowering to the combiner.
+ MIRBuilder.buildInstr(TargetOpcode::G_FMA, Dst, Op0, Op1, Op2);
+ } else {
+ LLT Ty = getLLTForType(*CI.getType(), *DL);
+ auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, Ty, Op0, Op1);
+ MIRBuilder.buildInstr(TargetOpcode::G_FADD, Dst, FMul, Op2);
+ }
+ return true;
+ }
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
@@ -807,6 +932,34 @@ bool IRTranslator::translateInlineAsm(const CallInst &CI,
return true;
}
+unsigned IRTranslator::packRegs(const Value &V,
+ MachineIRBuilder &MIRBuilder) {
+ ArrayRef<unsigned> Regs = getOrCreateVRegs(V);
+ ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V);
+ LLT BigTy = getLLTForType(*V.getType(), *DL);
+
+ if (Regs.size() == 1)
+ return Regs[0];
+
+ unsigned Dst = MRI->createGenericVirtualRegister(BigTy);
+ MIRBuilder.buildUndef(Dst);
+ for (unsigned i = 0; i < Regs.size(); ++i) {
+ unsigned NewDst = MRI->createGenericVirtualRegister(BigTy);
+ MIRBuilder.buildInsert(NewDst, Dst, Regs[i], Offsets[i]);
+ Dst = NewDst;
+ }
+ return Dst;
+}
+
+void IRTranslator::unpackRegs(const Value &V, unsigned Src,
+ MachineIRBuilder &MIRBuilder) {
+ ArrayRef<unsigned> Regs = getOrCreateVRegs(V);
+ ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V);
+
+ for (unsigned i = 0; i < Regs.size(); ++i)
+ MIRBuilder.buildExtract(Regs[i], Src, Offsets[i]);
+}
+
bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
const CallInst &CI = cast<CallInst>(U);
auto TII = MF->getTarget().getIntrinsicInfo();
@@ -826,16 +979,24 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
}
+ bool IsSplitType = valueIsSplit(CI);
if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) {
- unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
+ unsigned Res = IsSplitType ? MRI->createGenericVirtualRegister(
+ getLLTForType(*CI.getType(), *DL))
+ : getOrCreateVReg(CI);
+
SmallVector<unsigned, 8> Args;
for (auto &Arg: CI.arg_operands())
- Args.push_back(getOrCreateVReg(*Arg));
+ Args.push_back(packRegs(*Arg, MIRBuilder));
MF->getFrameInfo().setHasCalls(true);
- return CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() {
+ bool Success = CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() {
return getOrCreateVReg(*CI.getCalledValue());
});
+
+ if (IsSplitType)
+ unpackRegs(CI, Res, MIRBuilder);
+ return Success;
}
assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
@@ -843,7 +1004,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (translateKnownIntrinsic(CI, ID, MIRBuilder))
return true;
- unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
+ unsigned Res = 0;
+ if (!CI.getType()->isVoidTy()) {
+ if (IsSplitType)
+ Res =
+ MRI->createGenericVirtualRegister(getLLTForType(*CI.getType(), *DL));
+ else
+ Res = getOrCreateVReg(CI);
+ }
MachineInstrBuilder MIB =
MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory());
@@ -851,9 +1019,12 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
// Some intrinsics take metadata parameters. Reject them.
if (isa<MetadataAsValue>(Arg))
return false;
- MIB.addUse(getOrCreateVReg(*Arg));
+ MIB.addUse(packRegs(*Arg, MIRBuilder));
}
+ if (IsSplitType)
+ unpackRegs(CI, Res, MIRBuilder);
+
// Add a MachineMemOperand if it is a target mem intrinsic.
const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
TargetLowering::IntrinsicInfo Info;
@@ -897,15 +1068,18 @@ bool IRTranslator::translateInvoke(const User &U,
MCSymbol *BeginSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
- unsigned Res = I.getType()->isVoidTy() ? 0 : getOrCreateVReg(I);
+ unsigned Res =
+ MRI->createGenericVirtualRegister(getLLTForType(*I.getType(), *DL));
SmallVector<unsigned, 8> Args;
for (auto &Arg: I.arg_operands())
- Args.push_back(getOrCreateVReg(*Arg));
+ Args.push_back(packRegs(*Arg, MIRBuilder));
if (!CLI->lowerCall(MIRBuilder, &I, Res, Args,
[&]() { return getOrCreateVReg(*I.getCalledValue()); }))
return false;
+ unpackRegs(I, Res, MIRBuilder);
+
MCSymbol *EndSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
@@ -964,27 +1138,18 @@ bool IRTranslator::translateLandingPad(const User &U,
return false;
MBB.addLiveIn(ExceptionReg);
- unsigned VReg = MRI->createGenericVirtualRegister(Tys[0]),
- Tmp = MRI->createGenericVirtualRegister(Ty);
- MIRBuilder.buildCopy(VReg, ExceptionReg);
- MIRBuilder.buildInsert(Tmp, Undef, VReg, 0);
+ ArrayRef<unsigned> ResRegs = getOrCreateVRegs(LP);
+ MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);
unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
if (!SelectorReg)
return false;
MBB.addLiveIn(SelectorReg);
-
- // N.b. the exception selector register always has pointer type and may not
- // match the actual IR-level type in the landingpad so an extra cast is
- // needed.
unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
MIRBuilder.buildCopy(PtrVReg, SelectorReg);
+ MIRBuilder.buildCast(ResRegs[1], PtrVReg);
- VReg = MRI->createGenericVirtualRegister(Tys[1]);
- MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT).addDef(VReg).addUse(PtrVReg);
- MIRBuilder.buildInsert(getOrCreateVReg(LP), Tmp, VReg,
- Tys[0].getSizeInBits());
return true;
}
@@ -992,6 +1157,9 @@ bool IRTranslator::translateAlloca(const User &U,
MachineIRBuilder &MIRBuilder) {
auto &AI = cast<AllocaInst>(U);
+ if (AI.isSwiftError())
+ return false;
+
if (AI.isStaticAlloca()) {
unsigned Res = getOrCreateVReg(AI);
int FI = getOrCreateFrameIndex(AI);
@@ -999,6 +1167,10 @@ bool IRTranslator::translateAlloca(const User &U,
return true;
}
+ // FIXME: support stack probing for Windows.
+ if (MF->getTarget().getTargetTriple().isOSWindows())
+ return false;
+
// Now we're in the harder dynamic case.
Type *Ty = AI.getAllocatedType();
unsigned Align =
@@ -1070,9 +1242,16 @@ bool IRTranslator::translateInsertElement(const User &U,
// not a legal vector type in LLT.
if (U.getType()->getVectorNumElements() == 1) {
unsigned Elt = getOrCreateVReg(*U.getOperand(1));
- ValToVReg[&U] = Elt;
+ auto &Regs = *VMap.getVRegs(U);
+ if (Regs.empty()) {
+ Regs.push_back(Elt);
+ VMap.getOffsets(U)->push_back(0);
+ } else {
+ MIRBuilder.buildCopy(Regs[0], Elt);
+ }
return true;
}
+
unsigned Res = getOrCreateVReg(U);
unsigned Val = getOrCreateVReg(*U.getOperand(0));
unsigned Elt = getOrCreateVReg(*U.getOperand(1));
@@ -1087,7 +1266,13 @@ bool IRTranslator::translateExtractElement(const User &U,
// not a legal vector type in LLT.
if (U.getOperand(0)->getType()->getVectorNumElements() == 1) {
unsigned Elt = getOrCreateVReg(*U.getOperand(0));
- ValToVReg[&U] = Elt;
+ auto &Regs = *VMap.getVRegs(U);
+ if (Regs.empty()) {
+ Regs.push_back(Elt);
+ VMap.getOffsets(U)->push_back(0);
+ } else {
+ MIRBuilder.buildCopy(Regs[0], Elt);
+ }
return true;
}
unsigned Res = getOrCreateVReg(U);
@@ -1109,17 +1294,115 @@ bool IRTranslator::translateShuffleVector(const User &U,
bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
const PHINode &PI = cast<PHINode>(U);
- auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
- MIB.addDef(getOrCreateVReg(PI));
- PendingPHIs.emplace_back(&PI, MIB.getInstr());
+ SmallVector<MachineInstr *, 4> Insts;
+ for (auto Reg : getOrCreateVRegs(PI)) {
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, Reg);
+ Insts.push_back(MIB.getInstr());
+ }
+
+ PendingPHIs.emplace_back(&PI, std::move(Insts));
+ return true;
+}
+
+bool IRTranslator::translateAtomicCmpXchg(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U);
+
+ if (I.isWeak())
+ return false;
+
+ auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone;
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+ Type *ResType = I.getType();
+ Type *ValType = ResType->Type::getStructElementType(0);
+
+ auto Res = getOrCreateVRegs(I);
+ unsigned OldValRes = Res[0];
+ unsigned SuccessRes = Res[1];
+ unsigned Addr = getOrCreateVReg(*I.getPointerOperand());
+ unsigned Cmp = getOrCreateVReg(*I.getCompareOperand());
+ unsigned NewVal = getOrCreateVReg(*I.getNewValOperand());
+
+ MIRBuilder.buildAtomicCmpXchgWithSuccess(
+ OldValRes, SuccessRes, Addr, Cmp, NewVal,
+ *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
+ Flags, DL->getTypeStoreSize(ValType),
+ getMemOpAlignment(I), AAMDNodes(), nullptr,
+ I.getSyncScopeID(), I.getSuccessOrdering(),
+ I.getFailureOrdering()));
+ return true;
+}
+
+bool IRTranslator::translateAtomicRMW(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const AtomicRMWInst &I = cast<AtomicRMWInst>(U);
+
+ auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone;
+ Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+ Type *ResType = I.getType();
+
+ unsigned Res = getOrCreateVReg(I);
+ unsigned Addr = getOrCreateVReg(*I.getPointerOperand());
+ unsigned Val = getOrCreateVReg(*I.getValOperand());
+
+ unsigned Opcode = 0;
+ switch (I.getOperation()) {
+ default:
+ llvm_unreachable("Unknown atomicrmw op");
+ return false;
+ case AtomicRMWInst::Xchg:
+ Opcode = TargetOpcode::G_ATOMICRMW_XCHG;
+ break;
+ case AtomicRMWInst::Add:
+ Opcode = TargetOpcode::G_ATOMICRMW_ADD;
+ break;
+ case AtomicRMWInst::Sub:
+ Opcode = TargetOpcode::G_ATOMICRMW_SUB;
+ break;
+ case AtomicRMWInst::And:
+ Opcode = TargetOpcode::G_ATOMICRMW_AND;
+ break;
+ case AtomicRMWInst::Nand:
+ Opcode = TargetOpcode::G_ATOMICRMW_NAND;
+ break;
+ case AtomicRMWInst::Or:
+ Opcode = TargetOpcode::G_ATOMICRMW_OR;
+ break;
+ case AtomicRMWInst::Xor:
+ Opcode = TargetOpcode::G_ATOMICRMW_XOR;
+ break;
+ case AtomicRMWInst::Max:
+ Opcode = TargetOpcode::G_ATOMICRMW_MAX;
+ break;
+ case AtomicRMWInst::Min:
+ Opcode = TargetOpcode::G_ATOMICRMW_MIN;
+ break;
+ case AtomicRMWInst::UMax:
+ Opcode = TargetOpcode::G_ATOMICRMW_UMAX;
+ break;
+ case AtomicRMWInst::UMin:
+ Opcode = TargetOpcode::G_ATOMICRMW_UMIN;
+ break;
+ }
+
+ MIRBuilder.buildAtomicRMW(
+ Opcode, Res, Addr, Val,
+ *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
+ Flags, DL->getTypeStoreSize(ResType),
+ getMemOpAlignment(I), AAMDNodes(), nullptr,
+ I.getSyncScopeID(), I.getOrdering()));
return true;
}
void IRTranslator::finishPendingPhis() {
- for (std::pair<const PHINode *, MachineInstr *> &Phi : PendingPHIs) {
+ for (auto &Phi : PendingPHIs) {
const PHINode *PI = Phi.first;
- MachineInstrBuilder MIB(*MF, Phi.second);
+ ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
// All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator
// won't create extra control flow here, otherwise we need to find the
@@ -1133,17 +1416,27 @@ void IRTranslator::finishPendingPhis() {
continue;
HandledPreds.insert(IRPred);
- unsigned ValReg = getOrCreateVReg(*PI->getIncomingValue(i));
+ ArrayRef<unsigned> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
- assert(Pred->isSuccessor(MIB->getParent()) &&
+ assert(Pred->isSuccessor(ComponentPHIs[0]->getParent()) &&
"incorrect CFG at MachineBasicBlock level");
- MIB.addUse(ValReg);
- MIB.addMBB(Pred);
+ for (unsigned j = 0; j < ValRegs.size(); ++j) {
+ MachineInstrBuilder MIB(*MF, ComponentPHIs[j]);
+ MIB.addUse(ValRegs[j]);
+ MIB.addMBB(Pred);
+ }
}
}
}
}
+bool IRTranslator::valueIsSplit(const Value &V,
+ SmallVectorImpl<uint64_t> *Offsets) {
+ SmallVector<LLT, 4> SplitTys;
+ computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets);
+ return SplitTys.size() > 1;
+}
+
bool IRTranslator::translate(const Instruction &Inst) {
CurBuilder.setDebugLoc(Inst.getDebugLoc());
switch(Inst.getOpcode()) {
@@ -1162,9 +1455,15 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
EntryBuilder.buildFConstant(Reg, *CF);
else if (isa<UndefValue>(C))
EntryBuilder.buildUndef(Reg);
- else if (isa<ConstantPointerNull>(C))
- EntryBuilder.buildConstant(Reg, 0);
- else if (auto GV = dyn_cast<GlobalValue>(&C))
+ else if (isa<ConstantPointerNull>(C)) {
+ // As we are trying to build a constant val of 0 into a pointer,
+ // insert a cast to make them correct with respect to types.
+ unsigned NullSize = DL->getTypeSizeInBits(C.getType());
+ auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize);
+ auto *ZeroVal = ConstantInt::get(ZeroTy, 0);
+ unsigned ZeroReg = getOrCreateVReg(*ZeroVal);
+ EntryBuilder.buildCast(Reg, ZeroReg);
+ } else if (auto GV = dyn_cast<GlobalValue>(&C))
EntryBuilder.buildGlobalValue(Reg, GV);
else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
if (!CAZ->getType()->isVectorTy())
@@ -1196,23 +1495,6 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
default:
return false;
}
- } else if (auto CS = dyn_cast<ConstantStruct>(&C)) {
- // Return the element if it is a single element ConstantStruct.
- if (CS->getNumOperands() == 1) {
- unsigned EltReg = getOrCreateVReg(*CS->getOperand(0));
- EntryBuilder.buildCast(Reg, EltReg);
- return true;
- }
- SmallVector<unsigned, 4> Ops;
- SmallVector<uint64_t, 4> Indices;
- uint64_t Offset = 0;
- for (unsigned i = 0; i < CS->getNumOperands(); ++i) {
- unsigned OpReg = getOrCreateVReg(*CS->getOperand(i));
- Ops.push_back(OpReg);
- Indices.push_back(Offset);
- Offset += MRI->getType(OpReg).getSizeInBits();
- }
- EntryBuilder.buildSequence(Reg, Ops, Indices);
} else if (auto CV = dyn_cast<ConstantVector>(&C)) {
if (CV->getNumOperands() == 1)
return translate(*CV->getOperand(0), Reg);
@@ -1231,7 +1513,7 @@ void IRTranslator::finalizeFunction() {
// Release the memory used by the different maps we
// needed during the translation.
PendingPHIs.clear();
- ValToVReg.clear();
+ VMap.reset();
FrameIndices.clear();
MachinePreds.clear();
// MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
@@ -1291,8 +1573,22 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
for (const Argument &Arg: F.args()) {
if (DL->getTypeStoreSize(Arg.getType()) == 0)
continue; // Don't handle zero sized types.
- VRegArgs.push_back(getOrCreateVReg(Arg));
+ VRegArgs.push_back(
+ MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL)));
}
+
+ // We don't currently support translating swifterror or swiftself functions.
+ for (auto &Arg : F.args()) {
+ if (Arg.hasSwiftErrorAttr() || Arg.hasSwiftSelfAttr()) {
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ F.getSubprogram(), &F.getEntryBlock());
+ R << "unable to lower arguments due to swifterror/swiftself: "
+ << ore::NV("Prototype", F.getType());
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return false;
+ }
+ }
+
if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
F.getSubprogram(), &F.getEntryBlock());
@@ -1301,14 +1597,28 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
return false;
}
+ auto ArgIt = F.arg_begin();
+ for (auto &VArg : VRegArgs) {
+ // If the argument is an unsplit scalar then don't use unpackRegs to avoid
+ // creating redundant copies.
+ if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) {
+ auto &VRegs = *VMap.getVRegs(cast<Value>(*ArgIt));
+ assert(VRegs.empty() && "VRegs already populated?");
+ VRegs.push_back(VArg);
+ } else {
+ unpackRegs(*ArgIt, VArg, EntryBuilder);
+ }
+ ArgIt++;
+ }
+
// And translate the function!
- for (const BasicBlock &BB: F) {
+ for (const BasicBlock &BB : F) {
MachineBasicBlock &MBB = getMBB(BB);
// Set the insertion point of all the following translations to
// the end of this basic block.
CurBuilder.setMBB(MBB);
- for (const Instruction &Inst: BB) {
+ for (const Instruction &Inst : BB) {
if (translate(Inst))
continue;
@@ -1358,5 +1668,9 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
assert(&MF->front() == &NewEntryBB &&
"New entry wasn't next in the list of basic block!");
+ // Initialize stack protector information.
+ StackProtector &SP = getAnalysis<StackProtector>();
+ SP.copyToMachineFrameInfo(MF->getFrameInfo());
+
return false;
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 422cc2219aa8..c83c791327e4 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -12,7 +12,6 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
@@ -57,23 +56,17 @@ InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) {
void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
+ getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
- const MachineRegisterInfo &MRI = MF.getRegInfo();
-
- // No matter what happens, whether we successfully select the function or not,
- // nothing is going to use the vreg types after us. Make sure they disappear.
- auto ClearVRegTypesOnReturn =
- make_scope_exit([&]() { MRI.getVRegToType().clear(); });
-
// If the ISel pipeline failed, do not bother running that pass.
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
- DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n');
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
@@ -85,23 +78,18 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
// FIXME: There are many other MF/MFI fields we need to initialize.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
#ifndef NDEBUG
// Check that our input is fully legal: we require the function to have the
// Legalized property, so it should be.
- // FIXME: This should be in the MachineVerifier, but it can't use the
- // LegalizerInfo as it's currently in the separate GlobalISel library.
- // The RegBankSelected property is already checked in the verifier. Note
- // that it has the same layering problem, but we only use inline methods so
- // end up not needing to link against the GlobalISel library.
- if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo())
- for (MachineBasicBlock &MBB : MF)
- for (MachineInstr &MI : MBB)
- if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) {
- reportGISelFailure(MF, TPC, MORE, "gisel-select",
- "instruction is not legal", MI);
- return false;
- }
-
+ // FIXME: This should be in the MachineVerifier, as the RegBankSelected
+ // property check already is.
+ if (!DisableGISelLegalityCheck)
+ if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-select",
+ "instruction is not legal", *MI);
+ return false;
+ }
#endif
// FIXME: We could introduce new blocks and will need to fix the outer loop.
// Until then, keep track of the number of blocks to assert that we don't.
@@ -129,12 +117,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
else
--MII;
- DEBUG(dbgs() << "Selecting: \n " << MI);
+ LLVM_DEBUG(dbgs() << "Selecting: \n " << MI);
// We could have folded this instruction away already, making it dead.
// If so, erase it.
if (isTriviallyDead(MI, MRI)) {
- DEBUG(dbgs() << "Is dead; erasing.\n");
+ LLVM_DEBUG(dbgs() << "Is dead; erasing.\n");
MI.eraseFromParentAndMarkDBGValuesForRemoval();
continue;
}
@@ -147,7 +135,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
}
// Dump the range of instructions that MI expanded into.
- DEBUG({
+ LLVM_DEBUG({
auto InsertedBegin = ReachedBegin ? MBB->begin() : std::next(MII);
dbgs() << "Into:\n";
for (auto &InsertedMI : make_range(InsertedBegin, AfterIt))
@@ -159,30 +147,63 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.empty())
+ continue;
+
+ // Try to find redundant copies b/w vregs of the same register class.
+ bool ReachedBegin = false;
+ for (auto MII = std::prev(MBB.end()), Begin = MBB.begin(); !ReachedBegin;) {
+ // Select this instruction.
+ MachineInstr &MI = *MII;
+
+ // And have our iterator point to the next instruction, if there is one.
+ if (MII == Begin)
+ ReachedBegin = true;
+ else
+ --MII;
+ if (MI.getOpcode() != TargetOpcode::COPY)
+ continue;
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ auto SrcRC = MRI.getRegClass(SrcReg);
+ auto DstRC = MRI.getRegClass(DstReg);
+ if (SrcRC == DstRC) {
+ MRI.replaceRegWith(DstReg, SrcReg);
+ MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ }
+ }
+ }
+ }
+
// Now that selection is complete, there are no more generic vregs. Verify
// that the size of the now-constrained vreg is unchanged and that it has a
// register class.
- for (auto &VRegToType : MRI.getVRegToType()) {
- unsigned VReg = VRegToType.first;
- auto *RC = MRI.getRegClassOrNull(VReg);
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ unsigned VReg = TargetRegisterInfo::index2VirtReg(I);
+
MachineInstr *MI = nullptr;
if (!MRI.def_empty(VReg))
MI = &*MRI.def_instr_begin(VReg);
else if (!MRI.use_empty(VReg))
MI = &*MRI.use_instr_begin(VReg);
+ if (!MI)
+ continue;
- if (MI && !RC) {
+ const TargetRegisterClass *RC = MRI.getRegClassOrNull(VReg);
+ if (!RC) {
reportGISelFailure(MF, TPC, MORE, "gisel-select",
"VReg has no regclass after selection", *MI);
return false;
- } else if (!RC)
- continue;
+ }
- if (VRegToType.second.isValid() &&
- VRegToType.second.getSizeInBits() > TRI.getRegSizeInBits(*RC)) {
- reportGISelFailure(MF, TPC, MORE, "gisel-select",
- "VReg has explicit size different from class size",
- *MI);
+ const LLT Ty = MRI.getType(VReg);
+ if (Ty.isValid() && Ty.getSizeInBits() > TRI.getRegSizeInBits(*RC)) {
+ reportGISelFailure(
+ MF, TPC, MORE, "gisel-select",
+ "VReg's low-level type and register class have different sizes", *MI);
return false;
}
}
@@ -199,6 +220,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
auto &TLI = *MF.getSubtarget().getTargetLowering();
TLI.finalizeLowering(MF);
+ LLVM_DEBUG({
+ dbgs() << "Rules covered by selecting function: " << MF.getName() << ":";
+ for (auto RuleID : CoverageInfo.covered())
+ dbgs() << " id" << RuleID;
+ dbgs() << "\n\n";
+ });
CoverageInfo.emit(CoveragePrefix,
MF.getSubtarget()
.getTargetLowering()
@@ -206,6 +233,11 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
.getTarget()
.getBackendName());
+ // If we successfully selected the function nothing is going to use the vreg
+ // types after us (otherwise MIRPrinter would need them). Make sure the types
+ // disappear.
+ MRI.clearVirtRegTypes();
+
// FIXME: Should we accurately track changes?
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 88669bd68c00..5e77fcbb0ed9 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -46,50 +46,6 @@ bool InstructionSelector::constrainOperandRegToRegClass(
constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC);
}
-bool InstructionSelector::constrainSelectedInstRegOperands(
- MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) const {
- MachineBasicBlock &MBB = *I.getParent();
- MachineFunction &MF = *MBB.getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- for (unsigned OpI = 0, OpE = I.getNumExplicitOperands(); OpI != OpE; ++OpI) {
- MachineOperand &MO = I.getOperand(OpI);
-
- // There's nothing to be done on non-register operands.
- if (!MO.isReg())
- continue;
-
- DEBUG(dbgs() << "Converting operand: " << MO << '\n');
- assert(MO.isReg() && "Unsupported non-reg operand");
-
- unsigned Reg = MO.getReg();
- // Physical registers don't need to be constrained.
- if (TRI.isPhysicalRegister(Reg))
- continue;
-
- // Register operands with a value of 0 (e.g. predicate operands) don't need
- // to be constrained.
- if (Reg == 0)
- continue;
-
- // If the operand is a vreg, we should constrain its regclass, and only
- // insert COPYs if that's impossible.
- // constrainOperandRegClass does that for us.
- MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(),
- Reg, OpI));
-
- // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been
- // done.
- if (MO.isUse()) {
- int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO);
- if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx))
- I.tieOperands(DefIdx, OpI);
- }
- }
- return true;
-}
-
bool InstructionSelector::isOperandImmEqual(
const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const {
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
new file mode 100644
index 000000000000..344f573a67f5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -0,0 +1,101 @@
+//===- lib/CodeGen/GlobalISel/LegalizerPredicates.cpp - Predicates --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A library of predicate factories to use for LegalityPredicate.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+using namespace llvm;
+
+LegalityPredicate LegalityPredicates::typeIs(unsigned TypeIdx, LLT Type) {
+ return
+ [=](const LegalityQuery &Query) { return Query.Types[TypeIdx] == Type; };
+}
+
+LegalityPredicate
+LegalityPredicates::typeInSet(unsigned TypeIdx,
+ std::initializer_list<LLT> TypesInit) {
+ SmallVector<LLT, 4> Types = TypesInit;
+ return [=](const LegalityQuery &Query) {
+ return std::find(Types.begin(), Types.end(), Query.Types[TypeIdx]) != Types.end();
+ };
+}
+
+LegalityPredicate LegalityPredicates::typePairInSet(
+ unsigned TypeIdx0, unsigned TypeIdx1,
+ std::initializer_list<std::pair<LLT, LLT>> TypesInit) {
+ SmallVector<std::pair<LLT, LLT>, 4> Types = TypesInit;
+ return [=](const LegalityQuery &Query) {
+ std::pair<LLT, LLT> Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1]};
+ return std::find(Types.begin(), Types.end(), Match) != Types.end();
+ };
+}
+
+LegalityPredicate LegalityPredicates::typePairAndMemSizeInSet(
+ unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx,
+ std::initializer_list<TypePairAndMemSize> TypesAndMemSizeInit) {
+ SmallVector<TypePairAndMemSize, 4> TypesAndMemSize = TypesAndMemSizeInit;
+ return [=](const LegalityQuery &Query) {
+ TypePairAndMemSize Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
+ Query.MMODescrs[MMOIdx].Size};
+ return std::find(TypesAndMemSize.begin(), TypesAndMemSize.end(), Match) !=
+ TypesAndMemSize.end();
+ };
+}
+
+LegalityPredicate LegalityPredicates::isScalar(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx].isScalar();
+ };
+}
+
+LegalityPredicate LegalityPredicates::narrowerThan(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT &QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isScalar() && QueryTy.getSizeInBits() < Size;
+ };
+}
+
+LegalityPredicate LegalityPredicates::widerThan(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT &QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isScalar() && QueryTy.getSizeInBits() > Size;
+ };
+}
+
+LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT &QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isScalar() && !isPowerOf2_32(QueryTy.getSizeInBits());
+ };
+}
+
+LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
+ return [=](const LegalityQuery &Query) {
+ return !isPowerOf2_32(Query.MMODescrs[MMOIdx].Size /* In Bytes */);
+ };
+}
+
+LegalityPredicate LegalityPredicates::numElementsNotPow2(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT &QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isVector() && isPowerOf2_32(QueryTy.getNumElements());
+ };
+}
+
+LegalityPredicate LegalityPredicates::atomicOrderingAtLeastOrStrongerThan(
+ unsigned MMOIdx, AtomicOrdering Ordering) {
+ return [=](const LegalityQuery &Query) {
+ return isAtLeastOrStrongerThan(Query.MMODescrs[MMOIdx].Ordering, Ordering);
+ };
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
new file mode 100644
index 000000000000..a29b32ecdc03
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -0,0 +1,51 @@
+//===- lib/CodeGen/GlobalISel/LegalizerMutations.cpp - Mutations ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A library of mutation factories to use for LegalityMutation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+using namespace llvm;
+
+LegalizeMutation LegalizeMutations::changeTo(unsigned TypeIdx, LLT Ty) {
+ return
+ [=](const LegalityQuery &Query) { return std::make_pair(TypeIdx, Ty); };
+}
+
+LegalizeMutation LegalizeMutations::changeTo(unsigned TypeIdx,
+ unsigned FromTypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ return std::make_pair(TypeIdx, Query.Types[FromTypeIdx]);
+ };
+}
+
+LegalizeMutation LegalizeMutations::widenScalarToNextPow2(unsigned TypeIdx,
+ unsigned Min) {
+ return [=](const LegalityQuery &Query) {
+ unsigned NewSizeInBits =
+ 1 << Log2_32_Ceil(Query.Types[TypeIdx].getSizeInBits());
+ if (NewSizeInBits < Min)
+ NewSizeInBits = Min;
+ return std::make_pair(TypeIdx, LLT::scalar(NewSizeInBits));
+ };
+}
+
+LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx,
+ unsigned Min) {
+ return [=](const LegalityQuery &Query) {
+ const LLT &VecTy = Query.Types[TypeIdx];
+ unsigned NewNumElements = 1 << Log2_32_Ceil(VecTy.getNumElements());
+ if (NewNumElements < Min)
+ NewNumElements = Min;
+ return std::make_pair(
+ TypeIdx, LLT::vector(NewNumElements, VecTy.getScalarSizeInBits()));
+ };
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index f09b0d9f11e7..9a2aac998a84 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -47,6 +47,7 @@ Legalizer::Legalizer() : MachineFunctionPass(ID) {
void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
+ getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -72,7 +73,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
- DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
init(MF);
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
@@ -112,7 +113,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
else
InstList.insert(MI);
}
- DEBUG(dbgs() << ".. .. New MI: " << *MI;);
+ LLVM_DEBUG(dbgs() << ".. .. New MI: " << *MI;);
});
const LegalizerInfo &LInfo(Helper.getLegalizerInfo());
LegalizationArtifactCombiner ArtCombiner(Helper.MIRBuilder, MF.getRegInfo(), LInfo);
@@ -127,7 +128,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *InstList.pop_back_val();
assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
- DEBUG(dbgs() << MI << "Is dead; erasing.\n");
+ LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
MI.eraseFromParentAndMarkDBGValuesForRemoval();
continue;
}
@@ -148,7 +149,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *ArtifactList.pop_back_val();
assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
- DEBUG(dbgs() << MI << "Is dead; erasing.\n");
+ LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
RemoveDeadInstFromLists(&MI);
MI.eraseFromParentAndMarkDBGValuesForRemoval();
continue;
@@ -156,7 +157,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
SmallVector<MachineInstr *, 4> DeadInstructions;
if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions)) {
for (auto *DeadMI : DeadInstructions) {
- DEBUG(dbgs() << ".. Erasing Dead Instruction " << *DeadMI);
+ LLVM_DEBUG(dbgs() << ".. Erasing Dead Instruction " << *DeadMI);
RemoveDeadInstFromLists(DeadMI);
DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 6bebe180fefd..87086af121b7 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -26,6 +26,7 @@
#define DEBUG_TYPE "legalizer"
using namespace llvm;
+using namespace LegalizeActions;
LegalizerHelper::LegalizerHelper(MachineFunction &MF)
: MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) {
@@ -34,34 +35,34 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF)
LegalizerHelper::LegalizeResult
LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
- DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
+ LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
- auto Action = LI.getAction(MI, MRI);
- switch (std::get<0>(Action)) {
- case LegalizerInfo::Legal:
- DEBUG(dbgs() << ".. Already legal\n");
+ auto Step = LI.getAction(MI, MRI);
+ switch (Step.Action) {
+ case Legal:
+ LLVM_DEBUG(dbgs() << ".. Already legal\n");
return AlreadyLegal;
- case LegalizerInfo::Libcall:
- DEBUG(dbgs() << ".. Convert to libcall\n");
+ case Libcall:
+ LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
return libcall(MI);
- case LegalizerInfo::NarrowScalar:
- DEBUG(dbgs() << ".. Narrow scalar\n");
- return narrowScalar(MI, std::get<1>(Action), std::get<2>(Action));
- case LegalizerInfo::WidenScalar:
- DEBUG(dbgs() << ".. Widen scalar\n");
- return widenScalar(MI, std::get<1>(Action), std::get<2>(Action));
- case LegalizerInfo::Lower:
- DEBUG(dbgs() << ".. Lower\n");
- return lower(MI, std::get<1>(Action), std::get<2>(Action));
- case LegalizerInfo::FewerElements:
- DEBUG(dbgs() << ".. Reduce number of elements\n");
- return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action));
- case LegalizerInfo::Custom:
- DEBUG(dbgs() << ".. Custom legalization\n");
+ case NarrowScalar:
+ LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
+ return narrowScalar(MI, Step.TypeIdx, Step.NewType);
+ case WidenScalar:
+ LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
+ return widenScalar(MI, Step.TypeIdx, Step.NewType);
+ case Lower:
+ LLVM_DEBUG(dbgs() << ".. Lower\n");
+ return lower(MI, Step.TypeIdx, Step.NewType);
+ case FewerElements:
+ LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
+ return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
+ case Custom:
+ LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized
: UnableToLegalize;
default:
- DEBUG(dbgs() << ".. Unable to legalize\n");
+ LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
return UnableToLegalize;
}
}
@@ -103,6 +104,9 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
case TargetOpcode::G_FPOW:
return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
+ case TargetOpcode::G_FMA:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
}
llvm_unreachable("Unknown libcall function");
}
@@ -123,13 +127,47 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
return LegalizerHelper::Legalized;
}
+// Useful for libcalls where all operands have the same type.
static LegalizerHelper::LegalizeResult
simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
Type *OpType) {
auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
+
+ SmallVector<CallLowering::ArgInfo, 3> Args;
+ for (unsigned i = 1; i < MI.getNumOperands(); i++)
+ Args.push_back({MI.getOperand(i).getReg(), OpType});
return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
- {{MI.getOperand(1).getReg(), OpType},
- {MI.getOperand(2).getReg(), OpType}});
+ Args);
+}
+
+static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
+ Type *FromType) {
+ auto ToMVT = MVT::getVT(ToType);
+ auto FromMVT = MVT::getVT(FromType);
+
+ switch (Opcode) {
+ case TargetOpcode::G_FPEXT:
+ return RTLIB::getFPEXT(FromMVT, ToMVT);
+ case TargetOpcode::G_FPTRUNC:
+ return RTLIB::getFPROUND(FromMVT, ToMVT);
+ case TargetOpcode::G_FPTOSI:
+ return RTLIB::getFPTOSINT(FromMVT, ToMVT);
+ case TargetOpcode::G_FPTOUI:
+ return RTLIB::getFPTOUINT(FromMVT, ToMVT);
+ case TargetOpcode::G_SITOFP:
+ return RTLIB::getSINTTOFP(FromMVT, ToMVT);
+ case TargetOpcode::G_UITOFP:
+ return RTLIB::getUINTTOFP(FromMVT, ToMVT);
+ }
+ llvm_unreachable("Unsupported libcall function");
+}
+
+static LegalizerHelper::LegalizeResult
+conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
+ Type *FromType) {
+ RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
+ return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
+ {{MI.getOperand(1).getReg(), FromType}});
}
LegalizerHelper::LegalizeResult
@@ -157,6 +195,7 @@ LegalizerHelper::libcall(MachineInstr &MI) {
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FMA:
case TargetOpcode::G_FPOW:
case TargetOpcode::G_FREM: {
Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
@@ -165,6 +204,59 @@ LegalizerHelper::libcall(MachineInstr &MI) {
return Status;
break;
}
+ case TargetOpcode::G_FPEXT: {
+ // FIXME: Support other floating point types (half, fp128 etc)
+ unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ if (ToSize != 64 || FromSize != 32)
+ return UnableToLegalize;
+ LegalizeResult Status = conversionLibcall(
+ MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
+ if (Status != Legalized)
+ return Status;
+ break;
+ }
+ case TargetOpcode::G_FPTRUNC: {
+ // FIXME: Support other floating point types (half, fp128 etc)
+ unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ if (ToSize != 32 || FromSize != 64)
+ return UnableToLegalize;
+ LegalizeResult Status = conversionLibcall(
+ MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
+ if (Status != Legalized)
+ return Status;
+ break;
+ }
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI: {
+ // FIXME: Support other types
+ unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ if (ToSize != 32 || (FromSize != 32 && FromSize != 64))
+ return UnableToLegalize;
+ LegalizeResult Status = conversionLibcall(
+ MI, MIRBuilder, Type::getInt32Ty(Ctx),
+ FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
+ if (Status != Legalized)
+ return Status;
+ break;
+ }
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP: {
+ // FIXME: Support other types
+ unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ if (FromSize != 32 || (ToSize != 32 && ToSize != 64))
+ return UnableToLegalize;
+ LegalizeResult Status = conversionLibcall(
+ MI, MIRBuilder,
+ ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
+ Type::getInt32Ty(Ctx));
+ if (Status != Legalized)
+ return Status;
+ break;
+ }
}
MI.eraseFromParent();
@@ -180,8 +272,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MIRBuilder.setInstr(MI);
- int64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- int64_t NarrowSize = NarrowTy.getSizeInBits();
+ uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ uint64_t NarrowSize = NarrowTy.getSizeInBits();
switch (MI.getOpcode()) {
default:
@@ -194,11 +286,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
int NumParts = SizeOp0 / NarrowSize;
SmallVector<unsigned, 2> DstRegs;
- for (int i = 0; i < NumParts; ++i) {
- unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildUndef(Dst);
- DstRegs.push_back(Dst);
- }
+ for (int i = 0; i < NumParts; ++i)
+ DstRegs.push_back(
+ MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
MI.eraseFromParent();
return Legalized;
@@ -249,8 +339,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
unsigned OpReg = MI.getOperand(0).getReg();
- int64_t OpStart = MI.getOperand(2).getImm();
- int64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+ uint64_t OpStart = MI.getOperand(2).getImm();
+ uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
for (int i = 0; i < NumParts; ++i) {
unsigned SrcStart = i * NarrowSize;
@@ -265,7 +355,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// OpSegStart is where this destination segment would start in OpReg if it
// extended infinitely in both directions.
- int64_t ExtractOffset, SegSize;
+ int64_t ExtractOffset;
+ uint64_t SegSize;
if (OpStart < SrcStart) {
ExtractOffset = 0;
SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
@@ -301,8 +392,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
unsigned OpReg = MI.getOperand(2).getReg();
- int64_t OpStart = MI.getOperand(3).getImm();
- int64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+ uint64_t OpStart = MI.getOperand(3).getImm();
+ uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
for (int i = 0; i < NumParts; ++i) {
unsigned DstStart = i * NarrowSize;
@@ -319,7 +410,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// OpSegStart is where this destination segment would start in OpReg if it
// extended infinitely in both directions.
- int64_t ExtractOffset, InsertOffset, SegSize;
+ int64_t ExtractOffset, InsertOffset;
+ uint64_t SegSize;
if (OpStart < DstStart) {
InsertOffset = 0;
ExtractOffset = DstStart - OpStart;
@@ -353,6 +445,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// NarrowSize.
if (SizeOp0 % NarrowSize != 0)
return UnableToLegalize;
+
+ const auto &MMO = **MI.memoperands_begin();
+ // This implementation doesn't work for atomics. Give up instead of doing
+ // something invalid.
+ if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
+ MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
+ return UnableToLegalize;
+
int NumParts = SizeOp0 / NarrowSize;
LLT OffsetTy = LLT::scalar(
MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
@@ -363,12 +463,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
unsigned SrcReg = 0;
unsigned Adjustment = i * NarrowSize / 8;
+ MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
+ MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
+ NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8,
+ MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(),
+ MMO.getOrdering(), MMO.getFailureOrdering());
+
MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
Adjustment);
- // TODO: This is conservatively correct, but we probably want to split the
- // memory operands in the future.
- MIRBuilder.buildLoad(DstReg, SrcReg, **MI.memoperands_begin());
+ MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO);
DstRegs.push_back(DstReg);
}
@@ -382,6 +486,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// NarrowSize.
if (SizeOp0 % NarrowSize != 0)
return UnableToLegalize;
+
+ const auto &MMO = **MI.memoperands_begin();
+ // This implementation doesn't work for atomics. Give up instead of doing
+ // something invalid.
+ if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
+ MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
+ return UnableToLegalize;
+
int NumParts = SizeOp0 / NarrowSize;
LLT OffsetTy = LLT::scalar(
MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
@@ -393,12 +505,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
unsigned DstReg = 0;
unsigned Adjustment = i * NarrowSize / 8;
+ MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
+ MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
+ NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8,
+ MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(),
+ MMO.getOrdering(), MMO.getFailureOrdering());
+
MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
Adjustment);
- // TODO: This is conservatively correct, but we probably want to split the
- // memory operands in the future.
- MIRBuilder.buildStore(SrcRegs[i], DstReg, **MI.memoperands_begin());
+ MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO);
}
MI.eraseFromParent();
return Legalized;
@@ -475,6 +591,22 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
}
}
+void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
+ unsigned OpIdx, unsigned ExtOpcode) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ auto ExtB = MIRBuilder.buildInstr(ExtOpcode, WideTy, MO.getReg());
+ MO.setReg(ExtB->getOperand(0).getReg());
+}
+
+void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
+ unsigned OpIdx, unsigned TruncOpcode) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ MIRBuilder.buildInstr(TruncOpcode, MO.getReg(), DstExt);
+ MO.setReg(DstExt);
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
MIRBuilder.setInstr(MI);
@@ -482,303 +614,201 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
case TargetOpcode::G_MUL:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
case TargetOpcode::G_SUB:
- case TargetOpcode::G_SHL: {
// Perform operation at larger width (any extension is fine here, high bits
// don't affect the result) and then truncate the result back to the
// original type.
- unsigned Src1Ext = MRI.createGenericVirtualRegister(WideTy);
- unsigned Src2Ext = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildAnyExt(Src1Ext, MI.getOperand(1).getReg());
- MIRBuilder.buildAnyExt(Src2Ext, MI.getOperand(2).getReg());
-
- unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildInstr(MI.getOpcode())
- .addDef(DstExt)
- .addUse(Src1Ext)
- .addUse(Src2Ext);
-
- MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
- MI.eraseFromParent();
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ MIRBuilder.recordInsertion(&MI);
return Legalized;
- }
+
+ case TargetOpcode::G_SHL:
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ // The "number of bits to shift" operand must preserve its value as an
+ // unsigned integer:
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ widenScalarDst(MI, WideTy);
+ MIRBuilder.recordInsertion(&MI);
+ return Legalized;
+
case TargetOpcode::G_SDIV:
- case TargetOpcode::G_UDIV:
case TargetOpcode::G_SREM:
- case TargetOpcode::G_UREM:
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ widenScalarDst(MI, WideTy);
+ MIRBuilder.recordInsertion(&MI);
+ return Legalized;
+
case TargetOpcode::G_ASHR:
- case TargetOpcode::G_LSHR: {
- unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV ||
- MI.getOpcode() == TargetOpcode::G_SREM ||
- MI.getOpcode() == TargetOpcode::G_ASHR
- ? TargetOpcode::G_SEXT
- : TargetOpcode::G_ZEXT;
-
- unsigned LHSExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildInstr(ExtOp).addDef(LHSExt).addUse(
- MI.getOperand(1).getReg());
-
- unsigned RHSExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildInstr(ExtOp).addDef(RHSExt).addUse(
- MI.getOperand(2).getReg());
-
- unsigned ResExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildInstr(MI.getOpcode())
- .addDef(ResExt)
- .addUse(LHSExt)
- .addUse(RHSExt);
-
- MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), ResExt);
- MI.eraseFromParent();
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
+ // The "number of bits to shift" operand must preserve its value as an
+ // unsigned integer:
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ widenScalarDst(MI, WideTy);
+ MIRBuilder.recordInsertion(&MI);
return Legalized;
- }
- case TargetOpcode::G_SELECT: {
+
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_LSHR:
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ widenScalarDst(MI, WideTy);
+ MIRBuilder.recordInsertion(&MI);
+ return Legalized;
+
+ case TargetOpcode::G_SELECT:
if (TypeIdx != 0)
return UnableToLegalize;
-
// Perform operation at larger width (any extension is fine here, high bits
// don't affect the result) and then truncate the result back to the
// original type.
- unsigned Src1Ext = MRI.createGenericVirtualRegister(WideTy);
- unsigned Src2Ext = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildAnyExt(Src1Ext, MI.getOperand(2).getReg());
- MIRBuilder.buildAnyExt(Src2Ext, MI.getOperand(3).getReg());
-
- unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildInstr(TargetOpcode::G_SELECT)
- .addDef(DstExt)
- .addReg(MI.getOperand(1).getReg())
- .addUse(Src1Ext)
- .addUse(Src2Ext);
-
- MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
- MI.eraseFromParent();
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ MIRBuilder.recordInsertion(&MI);
return Legalized;
- }
+
case TargetOpcode::G_FPTOSI:
- case TargetOpcode::G_FPTOUI: {
+ case TargetOpcode::G_FPTOUI:
if (TypeIdx != 0)
return UnableToLegalize;
-
- unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildInstr(MI.getOpcode())
- .addDef(DstExt)
- .addUse(MI.getOperand(1).getReg());
-
- MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
- MI.eraseFromParent();
+ widenScalarDst(MI, WideTy);
+ MIRBuilder.recordInsertion(&MI);
return Legalized;
- }
+
case TargetOpcode::G_SITOFP:
- case TargetOpcode::G_UITOFP: {
if (TypeIdx != 1)
return UnableToLegalize;
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
+ MIRBuilder.recordInsertion(&MI);
+ return Legalized;
- unsigned Src = MI.getOperand(1).getReg();
- unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
-
- if (MI.getOpcode() == TargetOpcode::G_SITOFP) {
- MIRBuilder.buildSExt(SrcExt, Src);
- } else {
- assert(MI.getOpcode() == TargetOpcode::G_UITOFP && "Unexpected conv op");
- MIRBuilder.buildZExt(SrcExt, Src);
- }
-
- MIRBuilder.buildInstr(MI.getOpcode())
- .addDef(MI.getOperand(0).getReg())
- .addUse(SrcExt);
-
- MI.eraseFromParent();
+ case TargetOpcode::G_UITOFP:
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+ MIRBuilder.recordInsertion(&MI);
return Legalized;
- }
- case TargetOpcode::G_INSERT: {
+
+ case TargetOpcode::G_INSERT:
if (TypeIdx != 0)
return UnableToLegalize;
-
- unsigned Src = MI.getOperand(1).getReg();
- unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildAnyExt(SrcExt, Src);
-
- unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
- auto MIB = MIRBuilder.buildInsert(DstExt, SrcExt, MI.getOperand(2).getReg(),
- MI.getOperand(3).getImm());
- for (unsigned OpNum = 4; OpNum < MI.getNumOperands(); OpNum += 2) {
- MIB.addReg(MI.getOperand(OpNum).getReg());
- MIB.addImm(MI.getOperand(OpNum + 1).getImm());
- }
-
- MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
- MI.eraseFromParent();
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ MIRBuilder.recordInsertion(&MI);
return Legalized;
- }
- case TargetOpcode::G_LOAD: {
- assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) ==
- WideTy.getSizeInBits() &&
- "illegal to increase number of bytes loaded");
-
- unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildLoad(DstExt, MI.getOperand(1).getReg(),
- **MI.memoperands_begin());
- MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
- MI.eraseFromParent();
+
+ case TargetOpcode::G_LOAD:
+ // For some types like i24, we might try to widen to i32. To properly handle
+ // this we should be using a dedicated extending load, until then avoid
+ // trying to legalize.
+ if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) !=
+ WideTy.getSizeInBits())
+ return UnableToLegalize;
+ LLVM_FALLTHROUGH;
+ case TargetOpcode::G_SEXTLOAD:
+ case TargetOpcode::G_ZEXTLOAD:
+ widenScalarDst(MI, WideTy);
+ MIRBuilder.recordInsertion(&MI);
return Legalized;
- }
+
case TargetOpcode::G_STORE: {
if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) ||
WideTy != LLT::scalar(8))
return UnableToLegalize;
- auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
- auto Content = TLI.getBooleanContents(false, false);
-
- unsigned ExtOp = TargetOpcode::G_ANYEXT;
- if (Content == TargetLoweringBase::ZeroOrOneBooleanContent)
- ExtOp = TargetOpcode::G_ZEXT;
- else if (Content == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
- ExtOp = TargetOpcode::G_SEXT;
- else
- ExtOp = TargetOpcode::G_ANYEXT;
-
- unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildInstr(ExtOp).addDef(SrcExt).addUse(
- MI.getOperand(0).getReg());
- MIRBuilder.buildStore(SrcExt, MI.getOperand(1).getReg(),
- **MI.memoperands_begin());
- MI.eraseFromParent();
+ widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT);
+ MIRBuilder.recordInsertion(&MI);
return Legalized;
}
case TargetOpcode::G_CONSTANT: {
- unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildConstant(DstExt, *MI.getOperand(1).getCImm());
- MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
- MI.eraseFromParent();
+ MachineOperand &SrcMO = MI.getOperand(1);
+ LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
+ const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
+ SrcMO.setCImm(ConstantInt::get(Ctx, Val));
+
+ widenScalarDst(MI, WideTy);
+ MIRBuilder.recordInsertion(&MI);
return Legalized;
}
case TargetOpcode::G_FCONSTANT: {
- unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
- const ConstantFP *CFP = MI.getOperand(1).getFPImm();
- APFloat Val = CFP->getValueAPF();
+ MachineOperand &SrcMO = MI.getOperand(1);
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
- auto LLT2Sem = [](LLT Ty) {
- switch (Ty.getSizeInBits()) {
- case 32:
- return &APFloat::IEEEsingle();
- break;
- case 64:
- return &APFloat::IEEEdouble();
- break;
- default:
- llvm_unreachable("Unhandled fp widen type");
- }
- };
+ APFloat Val = SrcMO.getFPImm()->getValueAPF();
bool LosesInfo;
- Val.convert(*LLT2Sem(WideTy), APFloat::rmTowardZero, &LosesInfo);
- MIRBuilder.buildFConstant(DstExt, *ConstantFP::get(Ctx, Val));
- MIRBuilder.buildFPTrunc(MI.getOperand(0).getReg(), DstExt);
- MI.eraseFromParent();
+ switch (WideTy.getSizeInBits()) {
+ case 32:
+ Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo);
+ break;
+ case 64:
+ Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo);
+ break;
+ default:
+ llvm_unreachable("Unhandled fp widen type");
+ }
+ SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
+
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ MIRBuilder.recordInsertion(&MI);
return Legalized;
}
- case TargetOpcode::G_BRCOND: {
- unsigned TstExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildAnyExt(TstExt, MI.getOperand(0).getReg());
- MIRBuilder.buildBrCond(TstExt, *MI.getOperand(1).getMBB());
- MI.eraseFromParent();
+ case TargetOpcode::G_BRCOND:
+ widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
+ MIRBuilder.recordInsertion(&MI);
return Legalized;
- }
- case TargetOpcode::G_FCMP: {
- unsigned Op0Ext, Op1Ext, DstReg;
- unsigned Cmp1 = MI.getOperand(2).getReg();
- unsigned Cmp2 = MI.getOperand(3).getReg();
- if (TypeIdx == 0) {
- Op0Ext = Cmp1;
- Op1Ext = Cmp2;
- DstReg = MRI.createGenericVirtualRegister(WideTy);
- } else {
- Op0Ext = MRI.createGenericVirtualRegister(WideTy);
- Op1Ext = MRI.createGenericVirtualRegister(WideTy);
- DstReg = MI.getOperand(0).getReg();
- MIRBuilder.buildInstr(TargetOpcode::G_FPEXT, Op0Ext, Cmp1);
- MIRBuilder.buildInstr(TargetOpcode::G_FPEXT, Op1Ext, Cmp2);
- }
- MIRBuilder.buildFCmp(
- static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()),
- DstReg, Op0Ext, Op1Ext);
+
+ case TargetOpcode::G_FCMP:
if (TypeIdx == 0)
- MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, MI.getOperand(0).getReg(),
- DstReg);
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_ICMP: {
- bool IsSigned = CmpInst::isSigned(
- static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()));
- unsigned Cmp1 = MI.getOperand(2).getReg();
- unsigned Cmp2 = MI.getOperand(3).getReg();
- unsigned Op0Ext, Op1Ext, DstReg;
- if (TypeIdx == 0) {
- Op0Ext = Cmp1;
- Op1Ext = Cmp2;
- DstReg = MRI.createGenericVirtualRegister(WideTy);
- } else {
- Op0Ext = MRI.createGenericVirtualRegister(WideTy);
- Op1Ext = MRI.createGenericVirtualRegister(WideTy);
- DstReg = MI.getOperand(0).getReg();
- if (IsSigned) {
- MIRBuilder.buildSExt(Op0Ext, Cmp1);
- MIRBuilder.buildSExt(Op1Ext, Cmp2);
- } else {
- MIRBuilder.buildZExt(Op0Ext, Cmp1);
- MIRBuilder.buildZExt(Op1Ext, Cmp2);
- }
+ widenScalarDst(MI, WideTy);
+ else {
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
}
- MIRBuilder.buildICmp(
- static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()),
- DstReg, Op0Ext, Op1Ext);
+ MIRBuilder.recordInsertion(&MI);
+ return Legalized;
+
+ case TargetOpcode::G_ICMP:
if (TypeIdx == 0)
- MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, MI.getOperand(0).getReg(),
- DstReg);
- MI.eraseFromParent();
+ widenScalarDst(MI, WideTy);
+ else {
+ unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
+ MI.getOperand(1).getPredicate()))
+ ? TargetOpcode::G_SEXT
+ : TargetOpcode::G_ZEXT;
+ widenScalarSrc(MI, WideTy, 2, ExtOpcode);
+ widenScalarSrc(MI, WideTy, 3, ExtOpcode);
+ }
+ MIRBuilder.recordInsertion(&MI);
return Legalized;
- }
- case TargetOpcode::G_GEP: {
+
+ case TargetOpcode::G_GEP:
assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
- unsigned OffsetExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildSExt(OffsetExt, MI.getOperand(2).getReg());
- MI.getOperand(2).setReg(OffsetExt);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ MIRBuilder.recordInsertion(&MI);
return Legalized;
- }
+
case TargetOpcode::G_PHI: {
assert(TypeIdx == 0 && "Expecting only Idx 0");
- auto getExtendedReg = [&](unsigned Reg, MachineBasicBlock &MBB) {
- auto FirstTermIt = MBB.getFirstTerminator();
- MIRBuilder.setInsertPt(MBB, FirstTermIt);
- MachineInstr *DefMI = MRI.getVRegDef(Reg);
- MachineInstrBuilder MIB;
- if (DefMI->getOpcode() == TargetOpcode::G_TRUNC)
- MIB = MIRBuilder.buildAnyExtOrTrunc(WideTy,
- DefMI->getOperand(1).getReg());
- else
- MIB = MIRBuilder.buildAnyExt(WideTy, Reg);
- return MIB->getOperand(0).getReg();
- };
- auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, WideTy);
- for (auto OpIt = MI.operands_begin() + 1, OpE = MI.operands_end();
- OpIt != OpE;) {
- unsigned Reg = OpIt++->getReg();
- MachineBasicBlock *OpMBB = OpIt++->getMBB();
- MIB.addReg(getExtendedReg(Reg, *OpMBB));
- MIB.addMBB(OpMBB);
+
+ for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
+ MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+ widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
}
- auto *MBB = MI.getParent();
- MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
- MIRBuilder.buildTrunc(MI.getOperand(0).getReg(),
- MIB->getOperand(0).getReg());
- MI.eraseFromParent();
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
+ widenScalarDst(MI, WideTy);
+ MIRBuilder.recordInsertion(&MI);
return Legalized;
}
}
@@ -874,11 +904,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
}
ConstantFP &ZeroForNegation =
*cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
- unsigned Zero = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildFConstant(Zero, ZeroForNegation);
+ auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
.addDef(Res)
- .addUse(Zero)
+ .addUse(Zero->getOperand(0).getReg())
.addUse(MI.getOperand(1).getReg());
MI.eraseFromParent();
return Legalized;
@@ -887,7 +916,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
// Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
// First, check if G_FNEG is marked as Lower. If so, we may
// end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
- if (LI.getAction({G_FNEG, Ty}).first == LegalizerInfo::Lower)
+ if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
return UnableToLegalize;
unsigned Res = MI.getOperand(0).getReg();
unsigned LHS = MI.getOperand(1).getReg();
@@ -913,6 +942,48 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_SEXTLOAD:
+ case TargetOpcode::G_ZEXTLOAD: {
+ // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned PtrReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ auto &MMO = **MI.memoperands_begin();
+
+ if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
+ // In the case of G_LOAD, this was a non-extending load already and we're
+ // about to lower to the same instruction.
+ if (MI.getOpcode() == TargetOpcode::G_LOAD)
+ return UnableToLegalize;
+ MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (DstTy.isScalar()) {
+ unsigned TmpReg = MRI.createGenericVirtualRegister(
+ LLT::scalar(MMO.getSize() /* in bytes */ * 8));
+ MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case TargetOpcode::G_LOAD:
+ MIRBuilder.buildAnyExt(DstReg, TmpReg);
+ break;
+ case TargetOpcode::G_SEXTLOAD:
+ MIRBuilder.buildSExt(DstReg, TmpReg);
+ break;
+ case TargetOpcode::G_ZEXTLOAD:
+ MIRBuilder.buildZExt(DstReg, TmpReg);
+ break;
+ }
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+ }
}
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 9c27c59a0654..ae061b64a38c 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -24,12 +24,87 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
#include <map>
+
using namespace llvm;
+using namespace LegalizeActions;
+
+#define DEBUG_TYPE "legalizer-info"
+
+cl::opt<bool> llvm::DisableGISelLegalityCheck(
+ "disable-gisel-legality-check",
+ cl::desc("Don't verify that MIR is fully legal between GlobalISel passes"),
+ cl::Hidden);
+
+raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
+ OS << Opcode << ", Tys={";
+ for (const auto &Type : Types) {
+ OS << Type << ", ";
+ }
+ OS << "}, Opcode=";
+
+ OS << Opcode << ", MMOs={";
+ for (const auto &MMODescr : MMODescrs) {
+ OS << MMODescr.Size << ", ";
+ }
+ OS << "}";
+
+ return OS;
+}
+
+LegalizeActionStep LegalizeRuleSet::apply(const LegalityQuery &Query) const {
+ LLVM_DEBUG(dbgs() << "Applying legalizer ruleset to: "; Query.print(dbgs());
+ dbgs() << "\n");
+ if (Rules.empty()) {
+ LLVM_DEBUG(dbgs() << ".. fallback to legacy rules (no rules defined)\n");
+ return {LegalizeAction::UseLegacyRules, 0, LLT{}};
+ }
+ for (const auto &Rule : Rules) {
+ if (Rule.match(Query)) {
+ LLVM_DEBUG(dbgs() << ".. match\n");
+ std::pair<unsigned, LLT> Mutation = Rule.determineMutation(Query);
+ LLVM_DEBUG(dbgs() << ".. .. " << (unsigned)Rule.getAction() << ", "
+ << Mutation.first << ", " << Mutation.second << "\n");
+ assert((Query.Types[Mutation.first] != Mutation.second ||
+ Rule.getAction() == Lower ||
+ Rule.getAction() == MoreElements ||
+ Rule.getAction() == FewerElements) &&
+ "Simple loop detected");
+ return {Rule.getAction(), Mutation.first, Mutation.second};
+ } else
+ LLVM_DEBUG(dbgs() << ".. no match\n");
+ }
+ LLVM_DEBUG(dbgs() << ".. unsupported\n");
+ return {LegalizeAction::Unsupported, 0, LLT{}};
+}
+
+bool LegalizeRuleSet::verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const {
+#ifndef NDEBUG
+ if (Rules.empty()) {
+ LLVM_DEBUG(
+ dbgs() << ".. type index coverage check SKIPPED: no rules defined\n");
+ return true;
+ }
+ const int64_t FirstUncovered = TypeIdxsCovered.find_first_unset();
+ if (FirstUncovered < 0) {
+ LLVM_DEBUG(dbgs() << ".. type index coverage check SKIPPED:"
+ " user-defined predicate detected\n");
+ return true;
+ }
+ const bool AllCovered = (FirstUncovered >= NumTypeIdxs);
+ LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered
+ << ", " << (AllCovered ? "OK" : "FAIL") << "\n");
+ return AllCovered;
+#else
+ return true;
+#endif
+}
LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
// Set defaults.
@@ -104,15 +179,16 @@ void LegalizerInfo::computeTables() {
if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() &&
ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr)
S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx];
- std::sort(ScalarSpecifiedActions.begin(), ScalarSpecifiedActions.end());
+ llvm::sort(ScalarSpecifiedActions.begin(),
+ ScalarSpecifiedActions.end());
checkPartialSizeAndActionsVector(ScalarSpecifiedActions);
setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions));
}
// 2. Handle pointer types
for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) {
- std::sort(PointerSpecifiedActions.second.begin(),
- PointerSpecifiedActions.second.end());
+ llvm::sort(PointerSpecifiedActions.second.begin(),
+ PointerSpecifiedActions.second.end());
checkPartialSizeAndActionsVector(PointerSpecifiedActions.second);
// For pointer types, we assume that there isn't a meaningfull way
// to change the number of bits used in the pointer.
@@ -124,8 +200,8 @@ void LegalizerInfo::computeTables() {
// 3. Handle vector types
SizeAndActionsVec ElementSizesSeen;
for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) {
- std::sort(VectorSpecifiedActions.second.begin(),
- VectorSpecifiedActions.second.end());
+ llvm::sort(VectorSpecifiedActions.second.begin(),
+ VectorSpecifiedActions.second.end());
const uint16_t ElementSize = VectorSpecifiedActions.first;
ElementSizesSeen.push_back({ElementSize, Legal});
checkPartialSizeAndActionsVector(VectorSpecifiedActions.second);
@@ -143,7 +219,7 @@ void LegalizerInfo::computeTables() {
Opcode, TypeIdx, ElementSize,
moreToWiderTypesAndLessToWidest(NumElementsActions));
}
- std::sort(ElementSizesSeen.begin(), ElementSizesSeen.end());
+ llvm::sort(ElementSizesSeen.begin(), ElementSizesSeen.end());
SizeChangeStrategy VectorElementSizeChangeStrategy =
&unsupportedForDifferentSizes;
if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() &&
@@ -162,8 +238,8 @@ void LegalizerInfo::computeTables() {
// probably going to need specialized lookup structures for various types before
// we have any hope of doing well with something like <13 x i3>. Even the common
// cases should do better than what we have now.
-std::pair<LegalizerInfo::LegalizeAction, LLT>
-LegalizerInfo::getAction(const InstrAspect &Aspect) const {
+std::pair<LegalizeAction, LLT>
+LegalizerInfo::getAspectAction(const InstrAspect &Aspect) const {
assert(TablesInitialized && "backend forgot to call computeTables");
// These *have* to be implemented for now, they're the fundamental basis of
// how everything else is transformed.
@@ -186,9 +262,87 @@ static LLT getTypeFromTypeIdx(const MachineInstr &MI,
return MRI.getType(MI.getOperand(OpIdx).getReg());
}
-std::tuple<LegalizerInfo::LegalizeAction, unsigned, LLT>
+unsigned LegalizerInfo::getOpcodeIdxForOpcode(unsigned Opcode) const {
+ assert(Opcode >= FirstOp && Opcode <= LastOp && "Unsupported opcode");
+ return Opcode - FirstOp;
+}
+
+unsigned LegalizerInfo::getActionDefinitionsIdx(unsigned Opcode) const {
+ unsigned OpcodeIdx = getOpcodeIdxForOpcode(Opcode);
+ if (unsigned Alias = RulesForOpcode[OpcodeIdx].getAlias()) {
+ LLVM_DEBUG(dbgs() << ".. opcode " << Opcode << " is aliased to " << Alias
+ << "\n");
+ OpcodeIdx = getOpcodeIdxForOpcode(Alias);
+ LLVM_DEBUG(dbgs() << ".. opcode " << Alias << " is aliased to "
+ << RulesForOpcode[OpcodeIdx].getAlias() << "\n");
+ assert(RulesForOpcode[OpcodeIdx].getAlias() == 0 && "Cannot chain aliases");
+ }
+
+ return OpcodeIdx;
+}
+
+const LegalizeRuleSet &
+LegalizerInfo::getActionDefinitions(unsigned Opcode) const {
+ unsigned OpcodeIdx = getActionDefinitionsIdx(Opcode);
+ return RulesForOpcode[OpcodeIdx];
+}
+
+LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder(unsigned Opcode) {
+ unsigned OpcodeIdx = getActionDefinitionsIdx(Opcode);
+ auto &Result = RulesForOpcode[OpcodeIdx];
+ assert(!Result.isAliasedByAnother() && "Modifying this opcode will modify aliases");
+ return Result;
+}
+
+LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder(
+ std::initializer_list<unsigned> Opcodes) {
+ unsigned Representative = *Opcodes.begin();
+
+ assert(Opcodes.begin() != Opcodes.end() &&
+ Opcodes.begin() + 1 != Opcodes.end() &&
+ "Initializer list must have at least two opcodes");
+
+ for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I)
+ aliasActionDefinitions(Representative, *I);
+
+ auto &Return = getActionDefinitionsBuilder(Representative);
+ Return.setIsAliasedByAnother();
+ return Return;
+}
+
+void LegalizerInfo::aliasActionDefinitions(unsigned OpcodeTo,
+ unsigned OpcodeFrom) {
+ assert(OpcodeTo != OpcodeFrom && "Cannot alias to self");
+ assert(OpcodeTo >= FirstOp && OpcodeTo <= LastOp && "Unsupported opcode");
+ const unsigned OpcodeFromIdx = getOpcodeIdxForOpcode(OpcodeFrom);
+ RulesForOpcode[OpcodeFromIdx].aliasTo(OpcodeTo);
+}
+
+LegalizeActionStep
+LegalizerInfo::getAction(const LegalityQuery &Query) const {
+ LegalizeActionStep Step = getActionDefinitions(Query.Opcode).apply(Query);
+ if (Step.Action != LegalizeAction::UseLegacyRules) {
+ return Step;
+ }
+
+ for (unsigned i = 0; i < Query.Types.size(); ++i) {
+ auto Action = getAspectAction({Query.Opcode, i, Query.Types[i]});
+ if (Action.first != Legal) {
+ LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i
+ << " Action=" << (unsigned)Action.first << ", "
+ << Action.second << "\n");
+ return {Action.first, i, Action.second};
+ } else
+ LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Legal\n");
+ }
+ LLVM_DEBUG(dbgs() << ".. (legacy) Legal\n");
+ return {Legal, 0, LLT{}};
+}
+
+LegalizeActionStep
LegalizerInfo::getAction(const MachineInstr &MI,
const MachineRegisterInfo &MRI) const {
+ SmallVector<LLT, 2> Types;
SmallBitVector SeenTypes(8);
const MCOperandInfo *OpInfo = MI.getDesc().OpInfo;
// FIXME: probably we'll need to cache the results here somehow?
@@ -205,16 +359,20 @@ LegalizerInfo::getAction(const MachineInstr &MI,
SeenTypes.set(TypeIdx);
LLT Ty = getTypeFromTypeIdx(MI, MRI, i, TypeIdx);
- auto Action = getAction({MI.getOpcode(), TypeIdx, Ty});
- if (Action.first != Legal)
- return std::make_tuple(Action.first, TypeIdx, Action.second);
+ Types.push_back(Ty);
}
- return std::make_tuple(Legal, 0, LLT{});
+
+ SmallVector<LegalityQuery::MemDesc, 2> MemDescrs;
+ for (const auto &MMO : MI.memoperands())
+ MemDescrs.push_back(
+ {MMO->getSize() /* in bytes */ * 8, MMO->getOrdering()});
+
+ return getAction({MI.getOpcode(), Types, MemDescrs});
}
bool LegalizerInfo::isLegal(const MachineInstr &MI,
const MachineRegisterInfo &MRI) const {
- return std::get<0>(getAction(MI, MRI)) == Legal;
+ return getAction(MI, MRI).Action == Legal;
}
bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -312,17 +470,18 @@ LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) {
case Unsupported:
return {Size, Unsupported};
case NotFound:
+ case UseLegacyRules:
llvm_unreachable("NotFound");
}
llvm_unreachable("Action has an unknown enum value");
}
-std::pair<LegalizerInfo::LegalizeAction, LLT>
+std::pair<LegalizeAction, LLT>
LegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const {
assert(Aspect.Type.isScalar() || Aspect.Type.isPointer());
if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)
return {NotFound, LLT()};
- const unsigned OpcodeIdx = Aspect.Opcode - FirstOp;
+ const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode);
if (Aspect.Type.isPointer() &&
AddrSpace2PointerActions[OpcodeIdx].find(Aspect.Type.getAddressSpace()) ==
AddrSpace2PointerActions[OpcodeIdx].end()) {
@@ -346,14 +505,14 @@ LegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const {
SizeAndAction.first)};
}
-std::pair<LegalizerInfo::LegalizeAction, LLT>
+std::pair<LegalizeAction, LLT>
LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const {
assert(Aspect.Type.isVector());
// First legalize the vector element size, then legalize the number of
// lanes in the vector.
if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)
return {NotFound, Aspect.Type};
- const unsigned OpcodeIdx = Aspect.Opcode - FirstOp;
+ const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode);
const unsigned TypeIdx = Aspect.Idx;
if (TypeIdx >= ScalarInVectorActions[OpcodeIdx].size())
return {NotFound, Aspect.Type};
@@ -380,3 +539,53 @@ LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const {
LLT::vector(NumElementsAndAction.first,
IntermediateType.getScalarSizeInBits())};
}
+
+/// \pre Type indices of every opcode form a dense set starting from 0.
+void LegalizerInfo::verify(const MCInstrInfo &MII) const {
+#ifndef NDEBUG
+ std::vector<unsigned> FailedOpcodes;
+ for (unsigned Opcode = FirstOp; Opcode <= LastOp; ++Opcode) {
+ const MCInstrDesc &MCID = MII.get(Opcode);
+ const unsigned NumTypeIdxs = std::accumulate(
+ MCID.opInfo_begin(), MCID.opInfo_end(), 0U,
+ [](unsigned Acc, const MCOperandInfo &OpInfo) {
+ return OpInfo.isGenericType()
+ ? std::max(OpInfo.getGenericTypeIndex() + 1U, Acc)
+ : Acc;
+ });
+ LLVM_DEBUG(dbgs() << MII.getName(Opcode) << " (opcode " << Opcode
+ << "): " << NumTypeIdxs << " type ind"
+ << (NumTypeIdxs == 1 ? "ex" : "ices") << "\n");
+ const LegalizeRuleSet &RuleSet = getActionDefinitions(Opcode);
+ if (!RuleSet.verifyTypeIdxsCoverage(NumTypeIdxs))
+ FailedOpcodes.push_back(Opcode);
+ }
+ if (!FailedOpcodes.empty()) {
+ errs() << "The following opcodes have ill-defined legalization rules:";
+ for (unsigned Opcode : FailedOpcodes)
+ errs() << " " << MII.getName(Opcode);
+ errs() << "\n";
+
+ report_fatal_error("ill-defined LegalizerInfo"
+ ", try -debug-only=legalizer-info for details");
+ }
+#endif
+}
+
+#ifndef NDEBUG
+// FIXME: This should be in the MachineVerifier, but it can't use the
+// LegalizerInfo as it's currently in the separate GlobalISel library.
+// Note that RegBankSelected property already checked in the verifier
+// has the same layering problem, but we only use inline methods so
+// end up not needing to link against the GlobalISel library.
+const MachineInstr *llvm::machineFunctionIsIllegal(const MachineFunction &MF) {
+ if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MachineBasicBlock &MBB : MF)
+ for (const MachineInstr &MI : MBB)
+ if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI))
+ return &MI;
+ }
+ return nullptr;
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index 8e16470b6f90..52b340753a50 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -44,6 +44,11 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) {
}
}
+void Localizer::getAnalysisUsage(AnalysisUsage &AU) const {
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
MachineBasicBlock *&InsertMBB) {
MachineInstr &MIUse = *MOUse.getParent();
@@ -59,7 +64,7 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
MachineFunctionProperties::Property::FailedISel))
return false;
- DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n');
init(MF);
@@ -73,7 +78,7 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
for (MachineInstr &MI : MBB) {
if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI))
continue;
- DEBUG(dbgs() << "Should localize: " << MI);
+ LLVM_DEBUG(dbgs() << "Should localize: " << MI);
assert(MI.getDesc().getNumDefs() == 1 &&
"More than one definition not supported yet");
unsigned Reg = MI.getOperand(0).getReg();
@@ -85,12 +90,12 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
MachineOperand &MOUse = *MOIt++;
// Check if the use is already local.
MachineBasicBlock *InsertMBB;
- DEBUG(MachineInstr &MIUse = *MOUse.getParent();
- dbgs() << "Checking use: " << MIUse
- << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
+ LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
+ dbgs() << "Checking use: " << MIUse
+ << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
if (isLocalUse(MOUse, MI, InsertMBB))
continue;
- DEBUG(dbgs() << "Fixing non-local use\n");
+ LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
Changed = true;
auto MBBAndReg = std::make_pair(InsertMBB, Reg);
auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
@@ -111,10 +116,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
LocalizedMI->getOperand(0).setReg(NewReg);
NewVRegIt =
MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
- DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
+ LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
}
- DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
- << '\n');
+ LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
+ << '\n');
// Update the user reg.
MOUse.setReg(NewVRegIt->second);
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 475bb82e5b9c..9df931eb81b3 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -22,96 +22,103 @@
using namespace llvm;
-void MachineIRBuilder::setMF(MachineFunction &MF) {
- this->MF = &MF;
- this->MBB = nullptr;
- this->MRI = &MF.getRegInfo();
- this->TII = MF.getSubtarget().getInstrInfo();
- this->DL = DebugLoc();
- this->II = MachineBasicBlock::iterator();
- this->InsertedInstr = nullptr;
-}
-
-void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) {
- this->MBB = &MBB;
- this->II = MBB.end();
+void MachineIRBuilderBase::setMF(MachineFunction &MF) {
+ State.MF = &MF;
+ State.MBB = nullptr;
+ State.MRI = &MF.getRegInfo();
+ State.TII = MF.getSubtarget().getInstrInfo();
+ State.DL = DebugLoc();
+ State.II = MachineBasicBlock::iterator();
+ State.InsertedInstr = nullptr;
+}
+
+void MachineIRBuilderBase::setMBB(MachineBasicBlock &MBB) {
+ State.MBB = &MBB;
+ State.II = MBB.end();
assert(&getMF() == MBB.getParent() &&
"Basic block is in a different function");
}
-void MachineIRBuilder::setInstr(MachineInstr &MI) {
+void MachineIRBuilderBase::setInstr(MachineInstr &MI) {
assert(MI.getParent() && "Instruction is not part of a basic block");
setMBB(*MI.getParent());
- this->II = MI.getIterator();
+ State.II = MI.getIterator();
}
-void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator II) {
+void MachineIRBuilderBase::setInsertPt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II) {
assert(MBB.getParent() == &getMF() &&
"Basic block is in a different function");
- this->MBB = &MBB;
- this->II = II;
+ State.MBB = &MBB;
+ State.II = II;
}
-void MachineIRBuilder::recordInsertions(
+void MachineIRBuilderBase::recordInsertion(MachineInstr *InsertedInstr) const {
+ if (State.InsertedInstr)
+ State.InsertedInstr(InsertedInstr);
+}
+
+void MachineIRBuilderBase::recordInsertions(
std::function<void(MachineInstr *)> Inserted) {
- InsertedInstr = std::move(Inserted);
+ State.InsertedInstr = std::move(Inserted);
}
-void MachineIRBuilder::stopRecordingInsertions() {
- InsertedInstr = nullptr;
+void MachineIRBuilderBase::stopRecordingInsertions() {
+ State.InsertedInstr = nullptr;
}
//------------------------------------------------------------------------------
// Build instruction variants.
//------------------------------------------------------------------------------
-MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) {
+MachineInstrBuilder MachineIRBuilderBase::buildInstr(unsigned Opcode) {
return insertInstr(buildInstrNoInsert(Opcode));
}
-MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) {
- MachineInstrBuilder MIB = BuildMI(getMF(), DL, getTII().get(Opcode));
+MachineInstrBuilder MachineIRBuilderBase::buildInstrNoInsert(unsigned Opcode) {
+ MachineInstrBuilder MIB = BuildMI(getMF(), getDL(), getTII().get(Opcode));
return MIB;
}
-
-MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
+MachineInstrBuilder MachineIRBuilderBase::insertInstr(MachineInstrBuilder MIB) {
getMBB().insert(getInsertPt(), MIB);
- if (InsertedInstr)
- InsertedInstr(MIB);
+ recordInsertion(MIB);
return MIB;
}
MachineInstrBuilder
-MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
- const MDNode *Expr) {
+MachineIRBuilderBase::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
+ const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
- assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
- "Expected inlined-at fields to agree");
- return insertInstr(BuildMI(getMF(), DL, getTII().get(TargetOpcode::DBG_VALUE),
+ assert(
+ cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
+ "Expected inlined-at fields to agree");
+ return insertInstr(BuildMI(getMF(), getDL(),
+ getTII().get(TargetOpcode::DBG_VALUE),
/*IsIndirect*/ false, Reg, Variable, Expr));
}
-MachineInstrBuilder
-MachineIRBuilder::buildIndirectDbgValue(unsigned Reg, const MDNode *Variable,
- const MDNode *Expr) {
+MachineInstrBuilder MachineIRBuilderBase::buildIndirectDbgValue(
+ unsigned Reg, const MDNode *Variable, const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
- assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
- "Expected inlined-at fields to agree");
- return insertInstr(BuildMI(getMF(), DL, getTII().get(TargetOpcode::DBG_VALUE),
+ assert(
+ cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
+ "Expected inlined-at fields to agree");
+ return insertInstr(BuildMI(getMF(), getDL(),
+ getTII().get(TargetOpcode::DBG_VALUE),
/*IsIndirect*/ true, Reg, Variable, Expr));
}
-MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
- const MDNode *Variable,
- const MDNode *Expr) {
+MachineInstrBuilder
+MachineIRBuilderBase::buildFIDbgValue(int FI, const MDNode *Variable,
+ const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
- assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
- "Expected inlined-at fields to agree");
+ assert(
+ cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
+ "Expected inlined-at fields to agree");
return buildInstr(TargetOpcode::DBG_VALUE)
.addFrameIndex(FI)
.addImm(0)
@@ -119,13 +126,13 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
.addMetadata(Expr);
}
-MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
- const MDNode *Variable,
- const MDNode *Expr) {
+MachineInstrBuilder MachineIRBuilderBase::buildConstDbgValue(
+ const Constant &C, const MDNode *Variable, const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
- assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
- "Expected inlined-at fields to agree");
+ assert(
+ cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
+ "Expected inlined-at fields to agree");
auto MIB = buildInstr(TargetOpcode::DBG_VALUE);
if (auto *CI = dyn_cast<ConstantInt>(&C)) {
if (CI->getBitWidth() > 64)
@@ -142,17 +149,18 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
}
-MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) {
- assert(MRI->getType(Res).isPointer() && "invalid operand type");
+MachineInstrBuilder MachineIRBuilderBase::buildFrameIndex(unsigned Res,
+ int Idx) {
+ assert(getMRI()->getType(Res).isPointer() && "invalid operand type");
return buildInstr(TargetOpcode::G_FRAME_INDEX)
.addDef(Res)
.addFrameIndex(Idx);
}
-MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res,
- const GlobalValue *GV) {
- assert(MRI->getType(Res).isPointer() && "invalid operand type");
- assert(MRI->getType(Res).getAddressSpace() ==
+MachineInstrBuilder
+MachineIRBuilderBase::buildGlobalValue(unsigned Res, const GlobalValue *GV) {
+ assert(getMRI()->getType(Res).isPointer() && "invalid operand type");
+ assert(getMRI()->getType(Res).getAddressSpace() ==
GV->getType()->getAddressSpace() &&
"address space mismatch");
@@ -161,29 +169,20 @@ MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res,
.addGlobalAddress(GV);
}
-MachineInstrBuilder MachineIRBuilder::buildBinaryOp(unsigned Opcode, unsigned Res, unsigned Op0,
- unsigned Op1) {
- assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
+void MachineIRBuilderBase::validateBinaryOp(unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ assert((getMRI()->getType(Res).isScalar() ||
+ getMRI()->getType(Res).isVector()) &&
"invalid operand type");
- assert(MRI->getType(Res) == MRI->getType(Op0) &&
- MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
-
- return buildInstr(Opcode)
- .addDef(Res)
- .addUse(Op0)
- .addUse(Op1);
-}
-
-MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0,
- unsigned Op1) {
- return buildBinaryOp(TargetOpcode::G_ADD, Res, Op0, Op1);
+ assert(getMRI()->getType(Res) == getMRI()->getType(Op0) &&
+ getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch");
}
-MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
- unsigned Op1) {
- assert(MRI->getType(Res).isPointer() &&
- MRI->getType(Res) == MRI->getType(Op0) && "type mismatch");
- assert(MRI->getType(Op1).isScalar() && "invalid offset type");
+MachineInstrBuilder MachineIRBuilderBase::buildGEP(unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ assert(getMRI()->getType(Res).isPointer() &&
+ getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch");
+ assert(getMRI()->getType(Op1).isScalar() && "invalid offset type");
return buildInstr(TargetOpcode::G_GEP)
.addDef(Res)
@@ -192,8 +191,8 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
}
Optional<MachineInstrBuilder>
-MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0,
- const LLT &ValueTy, uint64_t Value) {
+MachineIRBuilderBase::materializeGEP(unsigned &Res, unsigned Op0,
+ const LLT &ValueTy, uint64_t Value) {
assert(Res == 0 && "Res is a result argument");
assert(ValueTy.isScalar() && "invalid offset type");
@@ -202,17 +201,18 @@ MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0,
return None;
}
- Res = MRI->createGenericVirtualRegister(MRI->getType(Op0));
- unsigned TmpReg = MRI->createGenericVirtualRegister(ValueTy);
+ Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0));
+ unsigned TmpReg = getMRI()->createGenericVirtualRegister(ValueTy);
buildConstant(TmpReg, Value);
return buildGEP(Res, Op0, TmpReg);
}
-MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,
- uint32_t NumBits) {
- assert(MRI->getType(Res).isPointer() &&
- MRI->getType(Res) == MRI->getType(Op0) && "type mismatch");
+MachineInstrBuilder MachineIRBuilderBase::buildPtrMask(unsigned Res,
+ unsigned Op0,
+ uint32_t NumBits) {
+ assert(getMRI()->getType(Res).isPointer() &&
+ getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch");
return buildInstr(TargetOpcode::G_PTR_MASK)
.addDef(Res)
@@ -220,92 +220,88 @@ MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,
.addImm(NumBits);
}
-MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0,
- unsigned Op1) {
- return buildBinaryOp(TargetOpcode::G_SUB, Res, Op0, Op1);
-}
-
-MachineInstrBuilder MachineIRBuilder::buildMul(unsigned Res, unsigned Op0,
- unsigned Op1) {
- return buildBinaryOp(TargetOpcode::G_MUL, Res, Op0, Op1);
-}
-
-MachineInstrBuilder MachineIRBuilder::buildAnd(unsigned Res, unsigned Op0,
- unsigned Op1) {
- return buildBinaryOp(TargetOpcode::G_AND, Res, Op0, Op1);
-}
-
-MachineInstrBuilder MachineIRBuilder::buildOr(unsigned Res, unsigned Op0,
- unsigned Op1) {
- return buildBinaryOp(TargetOpcode::G_OR, Res, Op0, Op1);
-}
-
-MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
+MachineInstrBuilder MachineIRBuilderBase::buildBr(MachineBasicBlock &Dest) {
return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
}
-MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) {
- assert(MRI->getType(Tgt).isPointer() && "invalid branch destination");
+MachineInstrBuilder MachineIRBuilderBase::buildBrIndirect(unsigned Tgt) {
+ assert(getMRI()->getType(Tgt).isPointer() && "invalid branch destination");
return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
}
-MachineInstrBuilder MachineIRBuilder::buildCopy(unsigned Res, unsigned Op) {
- assert(MRI->getType(Res) == LLT() || MRI->getType(Op) == LLT() ||
- MRI->getType(Res) == MRI->getType(Op));
+MachineInstrBuilder MachineIRBuilderBase::buildCopy(unsigned Res, unsigned Op) {
+ assert(getMRI()->getType(Res) == LLT() || getMRI()->getType(Op) == LLT() ||
+ getMRI()->getType(Res) == getMRI()->getType(Op));
return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op);
}
-MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res,
- const ConstantInt &Val) {
- LLT Ty = MRI->getType(Res);
+MachineInstrBuilder
+MachineIRBuilderBase::buildConstant(unsigned Res, const ConstantInt &Val) {
+ LLT Ty = getMRI()->getType(Res);
assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type");
const ConstantInt *NewVal = &Val;
if (Ty.getSizeInBits() != Val.getBitWidth())
- NewVal = ConstantInt::get(MF->getFunction().getContext(),
+ NewVal = ConstantInt::get(getMF().getFunction().getContext(),
Val.getValue().sextOrTrunc(Ty.getSizeInBits()));
return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal);
}
-MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res,
- int64_t Val) {
- auto IntN = IntegerType::get(MF->getFunction().getContext(),
- MRI->getType(Res).getSizeInBits());
+MachineInstrBuilder MachineIRBuilderBase::buildConstant(unsigned Res,
+ int64_t Val) {
+ auto IntN = IntegerType::get(getMF().getFunction().getContext(),
+ getMRI()->getType(Res).getSizeInBits());
ConstantInt *CI = ConstantInt::get(IntN, Val, true);
return buildConstant(Res, *CI);
}
-MachineInstrBuilder MachineIRBuilder::buildFConstant(unsigned Res,
- const ConstantFP &Val) {
- assert(MRI->getType(Res).isScalar() && "invalid operand type");
+MachineInstrBuilder
+MachineIRBuilderBase::buildFConstant(unsigned Res, const ConstantFP &Val) {
+ assert(getMRI()->getType(Res).isScalar() && "invalid operand type");
return buildInstr(TargetOpcode::G_FCONSTANT).addDef(Res).addFPImm(&Val);
}
-MachineInstrBuilder MachineIRBuilder::buildBrCond(unsigned Tst,
- MachineBasicBlock &Dest) {
- assert(MRI->getType(Tst).isScalar() && "invalid operand type");
+MachineInstrBuilder MachineIRBuilderBase::buildFConstant(unsigned Res,
+ double Val) {
+ LLT DstTy = getMRI()->getType(Res);
+ auto &Ctx = getMF().getFunction().getContext();
+ auto *CFP =
+ ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getSizeInBits()));
+ return buildFConstant(Res, *CFP);
+}
+
+MachineInstrBuilder MachineIRBuilderBase::buildBrCond(unsigned Tst,
+ MachineBasicBlock &Dest) {
+ assert(getMRI()->getType(Tst).isScalar() && "invalid operand type");
return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest);
}
-MachineInstrBuilder MachineIRBuilder::buildLoad(unsigned Res, unsigned Addr,
- MachineMemOperand &MMO) {
- assert(MRI->getType(Res).isValid() && "invalid operand type");
- assert(MRI->getType(Addr).isPointer() && "invalid operand type");
+MachineInstrBuilder MachineIRBuilderBase::buildLoad(unsigned Res, unsigned Addr,
+ MachineMemOperand &MMO) {
+ return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO);
+}
+
+MachineInstrBuilder
+MachineIRBuilderBase::buildLoadInstr(unsigned Opcode, unsigned Res,
+ unsigned Addr, MachineMemOperand &MMO) {
+ assert(getMRI()->getType(Res).isValid() && "invalid operand type");
+ assert(getMRI()->getType(Addr).isPointer() && "invalid operand type");
- return buildInstr(TargetOpcode::G_LOAD)
+ return buildInstr(Opcode)
.addDef(Res)
.addUse(Addr)
.addMemOperand(&MMO);
}
-MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr,
- MachineMemOperand &MMO) {
- assert(MRI->getType(Val).isValid() && "invalid operand type");
- assert(MRI->getType(Addr).isPointer() && "invalid operand type");
+MachineInstrBuilder MachineIRBuilderBase::buildStore(unsigned Val,
+ unsigned Addr,
+ MachineMemOperand &MMO) {
+ assert(getMRI()->getType(Val).isValid() && "invalid operand type");
+ assert(getMRI()->getType(Addr).isPointer() && "invalid operand type");
return buildInstr(TargetOpcode::G_STORE)
.addUse(Val)
@@ -313,15 +309,16 @@ MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr,
.addMemOperand(&MMO);
}
-MachineInstrBuilder MachineIRBuilder::buildUAdde(unsigned Res,
- unsigned CarryOut,
- unsigned Op0, unsigned Op1,
- unsigned CarryIn) {
- assert(MRI->getType(Res).isScalar() && "invalid operand type");
- assert(MRI->getType(Res) == MRI->getType(Op0) &&
- MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
- assert(MRI->getType(CarryOut).isScalar() && "invalid operand type");
- assert(MRI->getType(CarryOut) == MRI->getType(CarryIn) && "type mismatch");
+MachineInstrBuilder MachineIRBuilderBase::buildUAdde(unsigned Res,
+ unsigned CarryOut,
+ unsigned Op0, unsigned Op1,
+ unsigned CarryIn) {
+ assert(getMRI()->getType(Res).isScalar() && "invalid operand type");
+ assert(getMRI()->getType(Res) == getMRI()->getType(Op0) &&
+ getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch");
+ assert(getMRI()->getType(CarryOut).isScalar() && "invalid operand type");
+ assert(getMRI()->getType(CarryOut) == getMRI()->getType(CarryIn) &&
+ "type mismatch");
return buildInstr(TargetOpcode::G_UADDE)
.addDef(Res)
@@ -331,58 +328,64 @@ MachineInstrBuilder MachineIRBuilder::buildUAdde(unsigned Res,
.addUse(CarryIn);
}
-MachineInstrBuilder MachineIRBuilder::buildAnyExt(unsigned Res, unsigned Op) {
+MachineInstrBuilder MachineIRBuilderBase::buildAnyExt(unsigned Res,
+ unsigned Op) {
validateTruncExt(Res, Op, true);
return buildInstr(TargetOpcode::G_ANYEXT).addDef(Res).addUse(Op);
}
-MachineInstrBuilder MachineIRBuilder::buildSExt(unsigned Res, unsigned Op) {
+MachineInstrBuilder MachineIRBuilderBase::buildSExt(unsigned Res, unsigned Op) {
validateTruncExt(Res, Op, true);
return buildInstr(TargetOpcode::G_SEXT).addDef(Res).addUse(Op);
}
-MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) {
+MachineInstrBuilder MachineIRBuilderBase::buildZExt(unsigned Res, unsigned Op) {
validateTruncExt(Res, Op, true);
return buildInstr(TargetOpcode::G_ZEXT).addDef(Res).addUse(Op);
}
-MachineInstrBuilder
-MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc, unsigned Res, unsigned Op) {
+MachineInstrBuilder MachineIRBuilderBase::buildExtOrTrunc(unsigned ExtOpc,
+ unsigned Res,
+ unsigned Op) {
assert((TargetOpcode::G_ANYEXT == ExtOpc || TargetOpcode::G_ZEXT == ExtOpc ||
TargetOpcode::G_SEXT == ExtOpc) &&
"Expecting Extending Opc");
- assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector());
- assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar());
+ assert(getMRI()->getType(Res).isScalar() ||
+ getMRI()->getType(Res).isVector());
+ assert(getMRI()->getType(Res).isScalar() == getMRI()->getType(Op).isScalar());
unsigned Opcode = TargetOpcode::COPY;
- if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
+ if (getMRI()->getType(Res).getSizeInBits() >
+ getMRI()->getType(Op).getSizeInBits())
Opcode = ExtOpc;
- else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
+ else if (getMRI()->getType(Res).getSizeInBits() <
+ getMRI()->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_TRUNC;
else
- assert(MRI->getType(Res) == MRI->getType(Op));
+ assert(getMRI()->getType(Res) == getMRI()->getType(Op));
return buildInstr(Opcode).addDef(Res).addUse(Op);
}
-MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
- unsigned Op) {
+MachineInstrBuilder MachineIRBuilderBase::buildSExtOrTrunc(unsigned Res,
+ unsigned Op) {
return buildExtOrTrunc(TargetOpcode::G_SEXT, Res, Op);
}
-MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res,
- unsigned Op) {
+MachineInstrBuilder MachineIRBuilderBase::buildZExtOrTrunc(unsigned Res,
+ unsigned Op) {
return buildExtOrTrunc(TargetOpcode::G_ZEXT, Res, Op);
}
-MachineInstrBuilder MachineIRBuilder::buildAnyExtOrTrunc(unsigned Res,
- unsigned Op) {
+MachineInstrBuilder MachineIRBuilderBase::buildAnyExtOrTrunc(unsigned Res,
+ unsigned Op) {
return buildExtOrTrunc(TargetOpcode::G_ANYEXT, Res, Op);
}
-MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) {
- LLT SrcTy = MRI->getType(Src);
- LLT DstTy = MRI->getType(Dst);
+MachineInstrBuilder MachineIRBuilderBase::buildCast(unsigned Dst,
+ unsigned Src) {
+ LLT SrcTy = getMRI()->getType(Src);
+ LLT DstTy = getMRI()->getType(Dst);
if (SrcTy == DstTy)
return buildCopy(Dst, Src);
@@ -399,17 +402,18 @@ MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) {
return buildInstr(Opcode).addDef(Dst).addUse(Src);
}
-MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src,
- uint64_t Index) {
+MachineInstrBuilder
+MachineIRBuilderBase::buildExtract(unsigned Res, unsigned Src, uint64_t Index) {
#ifndef NDEBUG
- assert(MRI->getType(Src).isValid() && "invalid operand type");
- assert(MRI->getType(Res).isValid() && "invalid operand type");
- assert(Index + MRI->getType(Res).getSizeInBits() <=
- MRI->getType(Src).getSizeInBits() &&
+ assert(getMRI()->getType(Src).isValid() && "invalid operand type");
+ assert(getMRI()->getType(Res).isValid() && "invalid operand type");
+ assert(Index + getMRI()->getType(Res).getSizeInBits() <=
+ getMRI()->getType(Src).getSizeInBits() &&
"extracting off end of register");
#endif
- if (MRI->getType(Res).getSizeInBits() == MRI->getType(Src).getSizeInBits()) {
+ if (getMRI()->getType(Res).getSizeInBits() ==
+ getMRI()->getType(Src).getSizeInBits()) {
assert(Index == 0 && "insertion past the end of a register");
return buildCast(Res, Src);
}
@@ -420,25 +424,25 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src,
.addImm(Index);
}
-void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
- ArrayRef<uint64_t> Indices) {
+void MachineIRBuilderBase::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
+ ArrayRef<uint64_t> Indices) {
#ifndef NDEBUG
assert(Ops.size() == Indices.size() && "incompatible args");
assert(!Ops.empty() && "invalid trivial sequence");
assert(std::is_sorted(Indices.begin(), Indices.end()) &&
"sequence offsets must be in ascending order");
- assert(MRI->getType(Res).isValid() && "invalid operand type");
+ assert(getMRI()->getType(Res).isValid() && "invalid operand type");
for (auto Op : Ops)
- assert(MRI->getType(Op).isValid() && "invalid operand type");
+ assert(getMRI()->getType(Op).isValid() && "invalid operand type");
#endif
- LLT ResTy = MRI->getType(Res);
- LLT OpTy = MRI->getType(Ops[0]);
+ LLT ResTy = getMRI()->getType(Res);
+ LLT OpTy = getMRI()->getType(Ops[0]);
unsigned OpSize = OpTy.getSizeInBits();
bool MaybeMerge = true;
for (unsigned i = 0; i < Ops.size(); ++i) {
- if (MRI->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) {
+ if (getMRI()->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) {
MaybeMerge = false;
break;
}
@@ -449,31 +453,32 @@ void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
return;
}
- unsigned ResIn = MRI->createGenericVirtualRegister(ResTy);
+ unsigned ResIn = getMRI()->createGenericVirtualRegister(ResTy);
buildUndef(ResIn);
for (unsigned i = 0; i < Ops.size(); ++i) {
- unsigned ResOut =
- i + 1 == Ops.size() ? Res : MRI->createGenericVirtualRegister(ResTy);
+ unsigned ResOut = i + 1 == Ops.size()
+ ? Res
+ : getMRI()->createGenericVirtualRegister(ResTy);
buildInsert(ResOut, ResIn, Ops[i], Indices[i]);
ResIn = ResOut;
}
}
-MachineInstrBuilder MachineIRBuilder::buildUndef(unsigned Res) {
+MachineInstrBuilder MachineIRBuilderBase::buildUndef(unsigned Res) {
return buildInstr(TargetOpcode::G_IMPLICIT_DEF).addDef(Res);
}
-MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res,
- ArrayRef<unsigned> Ops) {
+MachineInstrBuilder MachineIRBuilderBase::buildMerge(unsigned Res,
+ ArrayRef<unsigned> Ops) {
#ifndef NDEBUG
assert(!Ops.empty() && "invalid trivial sequence");
- LLT Ty = MRI->getType(Ops[0]);
+ LLT Ty = getMRI()->getType(Ops[0]);
for (auto Reg : Ops)
- assert(MRI->getType(Reg) == Ty && "type mismatch in input list");
- assert(Ops.size() * MRI->getType(Ops[0]).getSizeInBits() ==
- MRI->getType(Res).getSizeInBits() &&
+ assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list");
+ assert(Ops.size() * getMRI()->getType(Ops[0]).getSizeInBits() ==
+ getMRI()->getType(Res).getSizeInBits() &&
"input operands do not cover output register");
#endif
@@ -487,16 +492,16 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res,
return MIB;
}
-MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res,
- unsigned Op) {
+MachineInstrBuilder MachineIRBuilderBase::buildUnmerge(ArrayRef<unsigned> Res,
+ unsigned Op) {
#ifndef NDEBUG
assert(!Res.empty() && "invalid trivial sequence");
- LLT Ty = MRI->getType(Res[0]);
+ LLT Ty = getMRI()->getType(Res[0]);
for (auto Reg : Res)
- assert(MRI->getType(Reg) == Ty && "type mismatch in input list");
- assert(Res.size() * MRI->getType(Res[0]).getSizeInBits() ==
- MRI->getType(Op).getSizeInBits() &&
+ assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list");
+ assert(Res.size() * getMRI()->getType(Res[0]).getSizeInBits() ==
+ getMRI()->getType(Op).getSizeInBits() &&
"input operands do not cover output register");
#endif
@@ -507,13 +512,15 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res,
return MIB;
}
-MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
- unsigned Op, unsigned Index) {
- assert(Index + MRI->getType(Op).getSizeInBits() <=
- MRI->getType(Res).getSizeInBits() &&
+MachineInstrBuilder MachineIRBuilderBase::buildInsert(unsigned Res,
+ unsigned Src, unsigned Op,
+ unsigned Index) {
+ assert(Index + getMRI()->getType(Op).getSizeInBits() <=
+ getMRI()->getType(Res).getSizeInBits() &&
"insertion past the end of a register");
- if (MRI->getType(Res).getSizeInBits() == MRI->getType(Op).getSizeInBits()) {
+ if (getMRI()->getType(Res).getSizeInBits() ==
+ getMRI()->getType(Op).getSizeInBits()) {
return buildCast(Res, Op);
}
@@ -524,9 +531,9 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
.addImm(Index);
}
-MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
- unsigned Res,
- bool HasSideEffects) {
+MachineInstrBuilder MachineIRBuilderBase::buildIntrinsic(Intrinsic::ID ID,
+ unsigned Res,
+ bool HasSideEffects) {
auto MIB =
buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
: TargetOpcode::G_INTRINSIC);
@@ -536,28 +543,30 @@ MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
return MIB;
}
-MachineInstrBuilder MachineIRBuilder::buildTrunc(unsigned Res, unsigned Op) {
+MachineInstrBuilder MachineIRBuilderBase::buildTrunc(unsigned Res,
+ unsigned Op) {
validateTruncExt(Res, Op, false);
return buildInstr(TargetOpcode::G_TRUNC).addDef(Res).addUse(Op);
}
-MachineInstrBuilder MachineIRBuilder::buildFPTrunc(unsigned Res, unsigned Op) {
+MachineInstrBuilder MachineIRBuilderBase::buildFPTrunc(unsigned Res,
+ unsigned Op) {
validateTruncExt(Res, Op, false);
return buildInstr(TargetOpcode::G_FPTRUNC).addDef(Res).addUse(Op);
}
-MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
- unsigned Res, unsigned Op0,
- unsigned Op1) {
+MachineInstrBuilder MachineIRBuilderBase::buildICmp(CmpInst::Predicate Pred,
+ unsigned Res, unsigned Op0,
+ unsigned Op1) {
#ifndef NDEBUG
- assert(MRI->getType(Op0) == MRI->getType(Op0) && "type mismatch");
+ assert(getMRI()->getType(Op0) == getMRI()->getType(Op0) && "type mismatch");
assert(CmpInst::isIntPredicate(Pred) && "invalid predicate");
- if (MRI->getType(Op0).isScalar() || MRI->getType(Op0).isPointer())
- assert(MRI->getType(Res).isScalar() && "type mismatch");
+ if (getMRI()->getType(Op0).isScalar() || getMRI()->getType(Op0).isPointer())
+ assert(getMRI()->getType(Res).isScalar() && "type mismatch");
else
- assert(MRI->getType(Res).isVector() &&
- MRI->getType(Res).getNumElements() ==
- MRI->getType(Op0).getNumElements() &&
+ assert(getMRI()->getType(Res).isVector() &&
+ getMRI()->getType(Res).getNumElements() ==
+ getMRI()->getType(Op0).getNumElements() &&
"type mismatch");
#endif
@@ -568,20 +577,21 @@ MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
.addUse(Op1);
}
-MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,
- unsigned Res, unsigned Op0,
- unsigned Op1) {
+MachineInstrBuilder MachineIRBuilderBase::buildFCmp(CmpInst::Predicate Pred,
+ unsigned Res, unsigned Op0,
+ unsigned Op1) {
#ifndef NDEBUG
- assert((MRI->getType(Op0).isScalar() || MRI->getType(Op0).isVector()) &&
+ assert((getMRI()->getType(Op0).isScalar() ||
+ getMRI()->getType(Op0).isVector()) &&
"invalid operand type");
- assert(MRI->getType(Op0) == MRI->getType(Op1) && "type mismatch");
+ assert(getMRI()->getType(Op0) == getMRI()->getType(Op1) && "type mismatch");
assert(CmpInst::isFPPredicate(Pred) && "invalid predicate");
- if (MRI->getType(Op0).isScalar())
- assert(MRI->getType(Res).isScalar() && "type mismatch");
+ if (getMRI()->getType(Op0).isScalar())
+ assert(getMRI()->getType(Res).isScalar() && "type mismatch");
else
- assert(MRI->getType(Res).isVector() &&
- MRI->getType(Res).getNumElements() ==
- MRI->getType(Op0).getNumElements() &&
+ assert(getMRI()->getType(Res).isVector() &&
+ getMRI()->getType(Res).getNumElements() ==
+ getMRI()->getType(Op0).getNumElements() &&
"type mismatch");
#endif
@@ -592,21 +602,23 @@ MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,
.addUse(Op1);
}
-MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst,
- unsigned Op0, unsigned Op1) {
+MachineInstrBuilder MachineIRBuilderBase::buildSelect(unsigned Res,
+ unsigned Tst,
+ unsigned Op0,
+ unsigned Op1) {
#ifndef NDEBUG
- LLT ResTy = MRI->getType(Res);
+ LLT ResTy = getMRI()->getType(Res);
assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) &&
"invalid operand type");
- assert(ResTy == MRI->getType(Op0) && ResTy == MRI->getType(Op1) &&
+ assert(ResTy == getMRI()->getType(Op0) && ResTy == getMRI()->getType(Op1) &&
"type mismatch");
if (ResTy.isScalar() || ResTy.isPointer())
- assert(MRI->getType(Tst).isScalar() && "type mismatch");
+ assert(getMRI()->getType(Tst).isScalar() && "type mismatch");
else
- assert((MRI->getType(Tst).isScalar() ||
- (MRI->getType(Tst).isVector() &&
- MRI->getType(Tst).getNumElements() ==
- MRI->getType(Op0).getNumElements())) &&
+ assert((getMRI()->getType(Tst).isScalar() ||
+ (getMRI()->getType(Tst).isVector() &&
+ getMRI()->getType(Tst).getNumElements() ==
+ getMRI()->getType(Op0).getNumElements())) &&
"type mismatch");
#endif
@@ -617,15 +629,14 @@ MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst,
.addUse(Op1);
}
-MachineInstrBuilder MachineIRBuilder::buildInsertVectorElement(unsigned Res,
- unsigned Val,
- unsigned Elt,
- unsigned Idx) {
+MachineInstrBuilder
+MachineIRBuilderBase::buildInsertVectorElement(unsigned Res, unsigned Val,
+ unsigned Elt, unsigned Idx) {
#ifndef NDEBUG
- LLT ResTy = MRI->getType(Res);
- LLT ValTy = MRI->getType(Val);
- LLT EltTy = MRI->getType(Elt);
- LLT IdxTy = MRI->getType(Idx);
+ LLT ResTy = getMRI()->getType(Res);
+ LLT ValTy = getMRI()->getType(Val);
+ LLT EltTy = getMRI()->getType(Elt);
+ LLT IdxTy = getMRI()->getType(Idx);
assert(ResTy.isVector() && ValTy.isVector() && "invalid operand type");
assert(IdxTy.isScalar() && "invalid operand type");
assert(ResTy.getNumElements() == ValTy.getNumElements() && "type mismatch");
@@ -639,13 +650,13 @@ MachineInstrBuilder MachineIRBuilder::buildInsertVectorElement(unsigned Res,
.addUse(Idx);
}
-MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res,
- unsigned Val,
- unsigned Idx) {
+MachineInstrBuilder
+MachineIRBuilderBase::buildExtractVectorElement(unsigned Res, unsigned Val,
+ unsigned Idx) {
#ifndef NDEBUG
- LLT ResTy = MRI->getType(Res);
- LLT ValTy = MRI->getType(Val);
- LLT IdxTy = MRI->getType(Idx);
+ LLT ResTy = getMRI()->getType(Res);
+ LLT ValTy = getMRI()->getType(Val);
+ LLT IdxTy = getMRI()->getType(Idx);
assert(ValTy.isVector() && "invalid operand type");
assert((ResTy.isScalar() || ResTy.isPointer()) && "invalid operand type");
assert(IdxTy.isScalar() && "invalid operand type");
@@ -658,15 +669,42 @@ MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res,
.addUse(Idx);
}
+MachineInstrBuilder MachineIRBuilderBase::buildAtomicCmpXchgWithSuccess(
+ unsigned OldValRes, unsigned SuccessRes, unsigned Addr, unsigned CmpVal,
+ unsigned NewVal, MachineMemOperand &MMO) {
+#ifndef NDEBUG
+ LLT OldValResTy = getMRI()->getType(OldValRes);
+ LLT SuccessResTy = getMRI()->getType(SuccessRes);
+ LLT AddrTy = getMRI()->getType(Addr);
+ LLT CmpValTy = getMRI()->getType(CmpVal);
+ LLT NewValTy = getMRI()->getType(NewVal);
+ assert(OldValResTy.isScalar() && "invalid operand type");
+ assert(SuccessResTy.isScalar() && "invalid operand type");
+ assert(AddrTy.isPointer() && "invalid operand type");
+ assert(CmpValTy.isValid() && "invalid operand type");
+ assert(NewValTy.isValid() && "invalid operand type");
+ assert(OldValResTy == CmpValTy && "type mismatch");
+ assert(OldValResTy == NewValTy && "type mismatch");
+#endif
+
+ return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS)
+ .addDef(OldValRes)
+ .addDef(SuccessRes)
+ .addUse(Addr)
+ .addUse(CmpVal)
+ .addUse(NewVal)
+ .addMemOperand(&MMO);
+}
+
MachineInstrBuilder
-MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
- unsigned CmpVal, unsigned NewVal,
- MachineMemOperand &MMO) {
+MachineIRBuilderBase::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
+ unsigned CmpVal, unsigned NewVal,
+ MachineMemOperand &MMO) {
#ifndef NDEBUG
- LLT OldValResTy = MRI->getType(OldValRes);
- LLT AddrTy = MRI->getType(Addr);
- LLT CmpValTy = MRI->getType(CmpVal);
- LLT NewValTy = MRI->getType(NewVal);
+ LLT OldValResTy = getMRI()->getType(OldValRes);
+ LLT AddrTy = getMRI()->getType(Addr);
+ LLT CmpValTy = getMRI()->getType(CmpVal);
+ LLT NewValTy = getMRI()->getType(NewVal);
assert(OldValResTy.isScalar() && "invalid operand type");
assert(AddrTy.isPointer() && "invalid operand type");
assert(CmpValTy.isValid() && "invalid operand type");
@@ -683,14 +721,102 @@ MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
.addMemOperand(&MMO);
}
-void MachineIRBuilder::validateTruncExt(unsigned Dst, unsigned Src,
- bool IsExtend) {
+MachineInstrBuilder
+MachineIRBuilderBase::buildAtomicRMW(unsigned Opcode, unsigned OldValRes,
+ unsigned Addr, unsigned Val,
+ MachineMemOperand &MMO) {
+#ifndef NDEBUG
+ LLT OldValResTy = getMRI()->getType(OldValRes);
+ LLT AddrTy = getMRI()->getType(Addr);
+ LLT ValTy = getMRI()->getType(Val);
+ assert(OldValResTy.isScalar() && "invalid operand type");
+ assert(AddrTy.isPointer() && "invalid operand type");
+ assert(ValTy.isValid() && "invalid operand type");
+ assert(OldValResTy == ValTy && "type mismatch");
+#endif
+
+ return buildInstr(Opcode)
+ .addDef(OldValRes)
+ .addUse(Addr)
+ .addUse(Val)
+ .addMemOperand(&MMO);
+}
+
+MachineInstrBuilder
+MachineIRBuilderBase::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XCHG, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilderBase::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_ADD, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilderBase::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_SUB, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilderBase::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_AND, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilderBase::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_NAND, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilderBase::buildAtomicRMWOr(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_OR, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilderBase::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XOR, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilderBase::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MAX, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilderBase::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MIN, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilderBase::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMAX, OldValRes, Addr, Val,
+ MMO);
+}
+MachineInstrBuilder
+MachineIRBuilderBase::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
+ unsigned Val, MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMIN, OldValRes, Addr, Val,
+ MMO);
+}
+
+void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src,
+ bool IsExtend) {
#ifndef NDEBUG
- LLT SrcTy = MRI->getType(Src);
- LLT DstTy = MRI->getType(Dst);
+ LLT SrcTy = getMRI()->getType(Src);
+ LLT DstTy = getMRI()->getType(Dst);
if (DstTy.isVector()) {
- assert(SrcTy.isVector() && "mismatched cast between vecot and non-vector");
+ assert(SrcTy.isVector() && "mismatched cast between vector and non-vector");
assert(SrcTy.getNumElements() == DstTy.getNumElements() &&
"different number of elements in a trunc/ext");
} else
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 006c9ea23034..9e2d48d1dc42 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
@@ -75,7 +76,7 @@ RegBankSelect::RegBankSelect(Mode RunningMode)
if (RegBankSelectMode.getNumOccurrences() != 0) {
OptMode = RegBankSelectMode;
if (RegBankSelectMode != RunningMode)
- DEBUG(dbgs() << "RegBankSelect mode overrided by command line\n");
+ LLVM_DEBUG(dbgs() << "RegBankSelect mode overrided by command line\n");
}
}
@@ -104,6 +105,7 @@ void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineBranchProbabilityInfo>();
}
AU.addRequired<TargetPassConfig>();
+ getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -122,11 +124,11 @@ bool RegBankSelect::assignmentMatch(
// Reg is free of assignment, a simple assignment will make the
// register bank to match.
OnlyAssign = CurRegBank == nullptr;
- DEBUG(dbgs() << "Does assignment already match: ";
- if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none";
- dbgs() << " against ";
- assert(DesiredRegBrank && "The mapping must be valid");
- dbgs() << *DesiredRegBrank << '\n';);
+ LLVM_DEBUG(dbgs() << "Does assignment already match: ";
+ if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none";
+ dbgs() << " against ";
+ assert(DesiredRegBrank && "The mapping must be valid");
+ dbgs() << *DesiredRegBrank << '\n';);
return CurRegBank == DesiredRegBrank;
}
@@ -159,8 +161,8 @@ bool RegBankSelect::repairReg(
// same types because the type is a placeholder when this function is called.
MachineInstr *MI =
MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src);
- DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst)
- << '\n');
+ LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst)
+ << '\n');
// TODO:
// Check if MI is legal. if not, we need to legalize all the
// instructions we are going to insert.
@@ -245,7 +247,7 @@ const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
MappingCost CurCost =
computeMapping(MI, *CurMapping, LocalRepairPts, &Cost);
if (CurCost < Cost) {
- DEBUG(dbgs() << "New best: " << CurCost << '\n');
+ LLVM_DEBUG(dbgs() << "New best: " << CurCost << '\n');
Cost = CurCost;
BestMapping = CurMapping;
RepairPts.clear();
@@ -397,11 +399,11 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1);
bool Saturated = Cost.addLocalCost(InstrMapping.getCost());
assert(!Saturated && "Possible mapping saturated the cost");
- DEBUG(dbgs() << "Evaluating mapping cost for: " << MI);
- DEBUG(dbgs() << "With: " << InstrMapping << '\n');
+ LLVM_DEBUG(dbgs() << "Evaluating mapping cost for: " << MI);
+ LLVM_DEBUG(dbgs() << "With: " << InstrMapping << '\n');
RepairPts.clear();
if (BestCost && Cost > *BestCost) {
- DEBUG(dbgs() << "Mapping is too expensive from the start\n");
+ LLVM_DEBUG(dbgs() << "Mapping is too expensive from the start\n");
return Cost;
}
@@ -417,17 +419,17 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- DEBUG(dbgs() << "Opd" << OpIdx << '\n');
+ LLVM_DEBUG(dbgs() << "Opd" << OpIdx << '\n');
const RegisterBankInfo::ValueMapping &ValMapping =
InstrMapping.getOperandMapping(OpIdx);
// If Reg is already properly mapped, this is free.
bool Assign;
if (assignmentMatch(Reg, ValMapping, Assign)) {
- DEBUG(dbgs() << "=> is free (match).\n");
+ LLVM_DEBUG(dbgs() << "=> is free (match).\n");
continue;
}
if (Assign) {
- DEBUG(dbgs() << "=> is free (simple assignment).\n");
+ LLVM_DEBUG(dbgs() << "=> is free (simple assignment).\n");
RepairPts.emplace_back(RepairingPlacement(MI, OpIdx, *TRI, *this,
RepairingPlacement::Reassign));
continue;
@@ -446,7 +448,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
// Check that the materialization of the repairing is possible.
if (!RepairPt.canMaterialize()) {
- DEBUG(dbgs() << "Mapping involves impossible repairing\n");
+ LLVM_DEBUG(dbgs() << "Mapping involves impossible repairing\n");
return MappingCost::ImpossibleCost();
}
@@ -473,7 +475,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
// This is an impossible to repair cost.
if (RepairCost == std::numeric_limits<unsigned>::max())
- continue;
+ return MappingCost::ImpossibleCost();
// Bias used for splitting: 5%.
const uint64_t PercentageForBias = 5;
@@ -509,7 +511,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
// Stop looking into what it takes to repair, this is already
// too expensive.
if (BestCost && Cost > *BestCost) {
- DEBUG(dbgs() << "Mapping is too expensive, stop processing\n");
+ LLVM_DEBUG(dbgs() << "Mapping is too expensive, stop processing\n");
return Cost;
}
@@ -519,7 +521,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
break;
}
}
- DEBUG(dbgs() << "Total cost is: " << Cost << "\n");
+ LLVM_DEBUG(dbgs() << "Total cost is: " << Cost << "\n");
return Cost;
}
@@ -559,14 +561,14 @@ bool RegBankSelect::applyMapping(
}
// Second, rewrite the instruction.
- DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');
+ LLVM_DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');
RBI->applyMapping(OpdMapper);
return true;
}
bool RegBankSelect::assignInstr(MachineInstr &MI) {
- DEBUG(dbgs() << "Assign: " << MI);
+ LLVM_DEBUG(dbgs() << "Assign: " << MI);
// Remember the repairing placement for all the operands.
SmallVector<RepairingPlacement, 4> RepairPts;
@@ -587,7 +589,7 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) {
// Make sure the mapping is valid for MI.
assert(BestMapping->verify(MI) && "Invalid instruction mapping");
- DEBUG(dbgs() << "Best Mapping: " << *BestMapping << '\n');
+ LLVM_DEBUG(dbgs() << "Best Mapping: " << *BestMapping << '\n');
// After this call, MI may not be valid anymore.
// Do not use it.
@@ -600,7 +602,7 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
MachineFunctionProperties::Property::FailedISel))
return false;
- DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
const Function &F = MF.getFunction();
Mode SaveOptMode = OptMode;
if (F.hasFnAttribute(Attribute::OptimizeNone))
@@ -610,20 +612,13 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
#ifndef NDEBUG
// Check that our input is fully legal: we require the function to have the
// Legalized property, so it should be.
- // FIXME: This should be in the MachineVerifier, but it can't use the
- // LegalizerInfo as it's currently in the separate GlobalISel library.
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) {
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) {
- reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
- "instruction is not legal", MI);
- return false;
- }
- }
+ // FIXME: This should be in the MachineVerifier.
+ if (!DisableGISelLegalityCheck)
+ if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) {
+ reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
+ "instruction is not legal", *MI);
+ return false;
}
- }
#endif
// Walk the function and assign register banks to all operands.
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
index 4d3ae69d3a9d..16f67a217ce1 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Config/llvm-config.h"
#define DEBUG_TYPE "registerbank"
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index b3d9209ae6eb..dd15567ef1c1 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -72,7 +73,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
const RegisterBank &RegBank = getRegBank(Idx);
assert(Idx == RegBank.getID() &&
"ID does not match the index in the array");
- DEBUG(dbgs() << "Verify " << RegBank << '\n');
+ LLVM_DEBUG(dbgs() << "Verify " << RegBank << '\n');
assert(RegBank.verify(TRI) && "RegBank is invalid");
}
#endif // NDEBUG
@@ -403,18 +404,18 @@ RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const {
void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
MachineInstr &MI = OpdMapper.getMI();
MachineRegisterInfo &MRI = OpdMapper.getMRI();
- DEBUG(dbgs() << "Applying default-like mapping\n");
+ LLVM_DEBUG(dbgs() << "Applying default-like mapping\n");
for (unsigned OpIdx = 0,
EndIdx = OpdMapper.getInstrMapping().getNumOperands();
OpIdx != EndIdx; ++OpIdx) {
- DEBUG(dbgs() << "OpIdx " << OpIdx);
+ LLVM_DEBUG(dbgs() << "OpIdx " << OpIdx);
MachineOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
- DEBUG(dbgs() << " is not a register, nothing to be done\n");
+ LLVM_DEBUG(dbgs() << " is not a register, nothing to be done\n");
continue;
}
if (!MO.getReg()) {
- DEBUG(dbgs() << " is %%noreg, nothing to be done\n");
+ LLVM_DEBUG(dbgs() << " is %%noreg, nothing to be done\n");
continue;
}
assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns !=
@@ -426,14 +427,14 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs =
OpdMapper.getVRegs(OpIdx);
if (NewRegs.begin() == NewRegs.end()) {
- DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
+ LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
continue;
}
unsigned OrigReg = MO.getReg();
unsigned NewReg = *NewRegs.begin();
- DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr));
+ LLVM_DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr));
MO.setReg(NewReg);
- DEBUG(dbgs() << " with " << printReg(NewReg, nullptr));
+ LLVM_DEBUG(dbgs() << " with " << printReg(NewReg, nullptr));
// The OperandsMapper creates plain scalar, we may have to fix that.
// Check if the types match and if not, fix that.
@@ -447,35 +448,27 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
assert(OrigTy.getSizeInBits() <= NewTy.getSizeInBits() &&
"Types with difference size cannot be handled by the default "
"mapping");
- DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to "
- << OrigTy);
+ LLVM_DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to "
+ << OrigTy);
MRI.setType(NewReg, OrigTy);
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
}
}
unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
- const TargetRegisterClass *RC = nullptr;
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
// The size is not directly available for physical registers.
// Instead, we need to access a register class that contains Reg and
// get the size of that register class.
- RC = &getMinimalPhysRegClass(Reg, TRI);
- } else {
- LLT Ty = MRI.getType(Reg);
- unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0;
- // If Reg is not a generic register, query the register class to
- // get its size.
- if (RegSize)
- return RegSize;
- // Since Reg is not a generic register, it must have a register class.
- RC = MRI.getRegClass(Reg);
+ // Because this is expensive, we'll cache the register class by calling
+ auto *RC = &getMinimalPhysRegClass(Reg, TRI);
+ assert(RC && "Expecting Register class");
+ return TRI.getRegSizeInBits(*RC);
}
- assert(RC && "Unable to deduce the register class");
- return TRI.getRegSizeInBits(*RC);
+ return TRI.getRegSizeInBits(Reg, MRI);
}
//------------------------------------------------------------------------------
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index ef990b49aceb..1a5f88743d5f 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -11,12 +11,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -42,20 +44,94 @@ unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI,
return Reg;
}
-
unsigned llvm::constrainOperandRegClass(
const MachineFunction &MF, const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II,
- unsigned Reg, unsigned OpIdx) {
+ const MachineOperand &RegMO, unsigned OpIdx) {
+ unsigned Reg = RegMO.getReg();
// Assume physical registers are properly constrained.
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"PhysReg not implemented");
const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF);
+ // Some of the target independent instructions, like COPY, may not impose any
+ // register class constraints on some of their operands: If it's a use, we can
+ // skip constraining as the instruction defining the register would constrain
+ // it.
+
+ // We can't constrain unallocatable register classes, because we can't create
+ // virtual registers for these classes, so we need to let targets handled this
+ // case.
+ if (RegClass && !RegClass->isAllocatable())
+ RegClass = TRI.getConstrainedRegClassForOperand(RegMO, MRI);
+
+ if (!RegClass) {
+ assert((!isTargetSpecificOpcode(II.getOpcode()) || RegMO.isUse()) &&
+ "Register class constraint is required unless either the "
+ "instruction is target independent or the operand is a use");
+ // FIXME: Just bailing out like this here could be not enough, unless we
+ // expect the users of this function to do the right thing for PHIs and
+ // COPY:
+ // v1 = COPY v0
+ // v2 = COPY v1
+ // v1 here may end up not being constrained at all. Please notice that to
+ // reproduce the issue we likely need a destination pattern of a selection
+ // rule producing such extra copies, not just an input GMIR with them as
+ // every existing target using selectImpl handles copies before calling it
+ // and they never reach this function.
+ return Reg;
+ }
return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass);
}
+bool llvm::constrainSelectedInstRegOperands(MachineInstr &I,
+ const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+ assert(!isPreISelGenericOpcode(I.getOpcode()) &&
+ "A selected instruction is expected");
+ MachineBasicBlock &MBB = *I.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ for (unsigned OpI = 0, OpE = I.getNumExplicitOperands(); OpI != OpE; ++OpI) {
+ MachineOperand &MO = I.getOperand(OpI);
+
+ // There's nothing to be done on non-register operands.
+ if (!MO.isReg())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Converting operand: " << MO << '\n');
+ assert(MO.isReg() && "Unsupported non-reg operand");
+
+ unsigned Reg = MO.getReg();
+ // Physical registers don't need to be constrained.
+ if (TRI.isPhysicalRegister(Reg))
+ continue;
+
+ // Register operands with a value of 0 (e.g. predicate operands) don't need
+ // to be constrained.
+ if (Reg == 0)
+ continue;
+
+ // If the operand is a vreg, we should constrain its regclass, and only
+ // insert COPYs if that's impossible.
+ // constrainOperandRegClass does that for us.
+ MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(),
+ MO, OpI));
+
+ // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been
+ // done.
+ if (MO.isUse()) {
+ int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO);
+ if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx))
+ I.tieOperands(DefIdx, OpI);
+ }
+ }
+ return true;
+}
+
bool llvm::isTriviallyDead(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
// If we can move an instruction, we can remove it. Otherwise, it has
@@ -101,7 +177,7 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
MI.getDebugLoc(), MI.getParent());
R << Msg;
// Printing MI is expensive; only do it if expensive remarks are enabled.
- if (MORE.allowExtraAnalysis(PassName))
+ if (TPC.isGlobalISelAbortEnabled() || MORE.allowExtraAnalysis(PassName))
R << ": " << ore::MNV("Inst", MI);
reportGISelFailure(MF, TPC, MORE, R);
}
@@ -145,3 +221,20 @@ llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, unsigned Reg,
}
return DefMI->getOpcode() == Opcode ? DefMI : nullptr;
}
+
+APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) {
+ if (Size == 32)
+ return APFloat(float(Val));
+ if (Size == 64)
+ return APFloat(Val);
+ if (Size != 16)
+ llvm_unreachable("Unsupported FPConstant size");
+ bool Ignored;
+ APFloat APF(Val);
+ APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);
+ return APF;
+}
+
+void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
+ AU.addPreserved<StackProtector>();
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
index 3888226fa059..ca56f4e0c4f1 100644
--- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -70,7 +70,6 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -89,6 +88,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
@@ -159,13 +159,13 @@ namespace {
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
- /// \brief Merge everything in \p Globals for which the corresponding bit
+ /// Merge everything in \p Globals for which the corresponding bit
/// in \p GlobalSet is set.
bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
const BitVector &GlobalSet, Module &M, bool isConst,
unsigned AddrSpace) const;
- /// \brief Check if the given variable has been identified as must keep
+ /// Check if the given variable has been identified as must keep
/// \pre setMustKeepGlobalVariables must have been called on the Module that
/// contains GV
bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
@@ -177,7 +177,7 @@ namespace {
void setMustKeepGlobalVariables(Module &M);
/// Collect every variables marked as "used"
- void collectUsedGlobalVariables(Module &M);
+ void collectUsedGlobalVariables(Module &M, StringRef Name);
/// Keep track of the GlobalVariable that must not be merged away
SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
@@ -242,7 +242,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
// code (currently, a Function) to the set of globals seen so far that are
// used together in that unit (GlobalUsesByFunction).
//
- // When we look at the Nth global, we now that any new set is either:
+ // When we look at the Nth global, we know that any new set is either:
// - the singleton set {N}, containing this global only, or
// - the union of {N} and a previously-discovered set, containing some
// combination of the previous N-1 globals.
@@ -440,28 +440,44 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
assert(Globals.size() > 1);
Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ Type *Int8Ty = Type::getInt8Ty(M.getContext());
auto &DL = M.getDataLayout();
- DEBUG(dbgs() << " Trying to merge set, starts with #"
- << GlobalSet.find_first() << "\n");
+ LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
+ << GlobalSet.find_first() << "\n");
+ bool Changed = false;
ssize_t i = GlobalSet.find_first();
while (i != -1) {
ssize_t j = 0;
uint64_t MergedSize = 0;
std::vector<Type*> Tys;
std::vector<Constant*> Inits;
+ std::vector<unsigned> StructIdxs;
bool HasExternal = false;
StringRef FirstExternalName;
+ unsigned MaxAlign = 1;
+ unsigned CurIdx = 0;
for (j = i; j != -1; j = GlobalSet.find_next(j)) {
Type *Ty = Globals[j]->getValueType();
+ unsigned Align = DL.getPreferredAlignment(Globals[j]);
+ unsigned Padding = alignTo(MergedSize, Align) - MergedSize;
+ MergedSize += Padding;
MergedSize += DL.getTypeAllocSize(Ty);
if (MergedSize > MaxOffset) {
break;
}
+ if (Padding) {
+ Tys.push_back(ArrayType::get(Int8Ty, Padding));
+ Inits.push_back(ConstantAggregateZero::get(Tys.back()));
+ ++CurIdx;
+ }
Tys.push_back(Ty);
Inits.push_back(Globals[j]->getInitializer());
+ StructIdxs.push_back(CurIdx++);
+
+ MaxAlign = std::max(MaxAlign, Align);
if (Globals[j]->hasExternalLinkage() && !HasExternal) {
HasExternal = true;
@@ -469,12 +485,19 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
}
}
+ // Exit early if there is only one global to merge.
+ if (Tys.size() < 2) {
+ i = j;
+ continue;
+ }
+
// If merged variables doesn't have external linkage, we needn't to expose
// the symbol after merging.
GlobalValue::LinkageTypes Linkage = HasExternal
? GlobalValue::ExternalLinkage
: GlobalValue::InternalLinkage;
- StructType *MergedTy = StructType::get(M.getContext(), Tys);
+ // Use a packed struct so we can control alignment.
+ StructType *MergedTy = StructType::get(M.getContext(), Tys, true);
Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
// On Darwin external linkage needs to be preserved, otherwise
@@ -492,19 +515,23 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,
GlobalVariable::NotThreadLocal, AddrSpace);
- const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
+ MergedGV->setAlignment(MaxAlign);
+ const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
std::string Name = Globals[k]->getName();
+ GlobalValue::DLLStorageClassTypes DLLStorage =
+ Globals[k]->getDLLStorageClass();
// Copy metadata while adjusting any debug info metadata by the original
// global's offset within the merged global.
- MergedGV->copyMetadata(Globals[k], MergedLayout->getElementOffset(idx));
+ MergedGV->copyMetadata(Globals[k],
+ MergedLayout->getElementOffset(StructIdxs[idx]));
Constant *Idx[2] = {
- ConstantInt::get(Int32Ty, 0),
- ConstantInt::get(Int32Ty, idx),
+ ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, StructIdxs[idx]),
};
Constant *GEP =
ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
@@ -517,20 +544,23 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
// It's not safe on Mach-O as the alias (and thus the portion of the
// MergedGlobals variable) may be dead stripped at link time.
if (Linkage != GlobalValue::InternalLinkage || !IsMachO) {
- GlobalAlias::create(Tys[idx], AddrSpace, Linkage, Name, GEP, &M);
+ GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace,
+ Linkage, Name, GEP, &M);
+ GA->setDLLStorageClass(DLLStorage);
}
NumMerged++;
}
+ Changed = true;
i = j;
}
- return true;
+ return Changed;
}
-void GlobalMerge::collectUsedGlobalVariables(Module &M) {
+void GlobalMerge::collectUsedGlobalVariables(Module &M, StringRef Name) {
// Extract global variables from llvm.used array
- const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ const GlobalVariable *GV = M.getGlobalVariable(Name);
if (!GV || !GV->hasInitializer()) return;
// Should be an array of 'i8*'.
@@ -543,7 +573,8 @@ void GlobalMerge::collectUsedGlobalVariables(Module &M) {
}
void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
- collectUsedGlobalVariables(M);
+ collectUsedGlobalVariables(M, "llvm.used");
+ collectUsedGlobalVariables(M, "llvm.compiler.used");
for (Function &F : M) {
for (BasicBlock &BB : F) {
@@ -577,8 +608,7 @@ bool GlobalMerge::doInitialization(Module &M) {
for (auto &GV : M.globals()) {
// Merge is safe for "normal" internal or external globals only
if (GV.isDeclaration() || GV.isThreadLocal() ||
- GV.hasSection() || GV.hasImplicitSection() ||
- GV.hasDLLExportStorageClass())
+ GV.hasSection() || GV.hasImplicitSection())
continue;
// It's not safe to merge globals that may be preempted
@@ -594,12 +624,6 @@ bool GlobalMerge::doInitialization(Module &M) {
unsigned AddressSpace = PT->getAddressSpace();
- // Ignore fancy-aligned globals for now.
- unsigned Alignment = DL.getPreferredAlignment(&GV);
- Type *Ty = GV.getValueType();
- if (Alignment > DL.getABITypeAlignment(Ty))
- continue;
-
// Ignore all 'special' globals.
if (GV.getName().startswith("llvm.") ||
GV.getName().startswith(".llvm."))
@@ -609,6 +633,7 @@ bool GlobalMerge::doInitialization(Module &M) {
if (isMustKeepGlobalVariable(&GV))
continue;
+ Type *Ty = GV.getValueType();
if (DL.getTypeAllocSize(Ty) < MaxOffset) {
if (TM &&
TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal())
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index d8ce90e63a9d..f12d00071b24 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -252,7 +252,7 @@ namespace {
BBInfo &TrueBBI, BBInfo &FalseBBI) const;
void AnalyzeBlock(MachineBasicBlock &MBB,
std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
- bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
+ bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Pred,
bool isTriangle = false, bool RevBranch = false,
bool hasCommonTail = false);
void AnalyzeBlocks(MachineFunction &MF,
@@ -347,7 +347,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MRI = &MF.getRegInfo();
- SchedModel.init(ST.getSchedModel(), &ST, TII);
+ SchedModel.init(&ST);
if (!TII) return false;
@@ -361,14 +361,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
getAnalysisIfAvailable<MachineModuleInfo>());
}
- DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
- << MF.getName() << "\'");
+ LLVM_DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
+ << MF.getName() << "\'");
if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
- DEBUG(dbgs() << " skipped\n");
+ LLVM_DEBUG(dbgs() << " skipped\n");
return false;
}
- DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "\n");
MF.RenumberBlocks();
BBAnalysis.resize(MF.getNumBlockIDs());
@@ -406,14 +406,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
case ICSimpleFalse: {
bool isFalse = Kind == ICSimpleFalse;
if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
- DEBUG(dbgs() << "Ifcvt (Simple"
- << (Kind == ICSimpleFalse ? " false" : "")
- << "): " << printMBBReference(*BBI.BB) << " ("
- << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber()
- : BBI.TrueBB->getNumber())
- << ") ");
+ LLVM_DEBUG(dbgs() << "Ifcvt (Simple"
+ << (Kind == ICSimpleFalse ? " false" : "")
+ << "): " << printMBBReference(*BBI.BB) << " ("
+ << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber()
+ : BBI.TrueBB->getNumber())
+ << ") ");
RetVal = IfConvertSimple(BBI, Kind);
- DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
if (RetVal) {
if (isFalse) ++NumSimpleFalse;
else ++NumSimple;
@@ -430,16 +430,16 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
if (DisableTriangleR && !isFalse && isRev) break;
if (DisableTriangleF && isFalse && !isRev) break;
if (DisableTriangleFR && isFalse && isRev) break;
- DEBUG(dbgs() << "Ifcvt (Triangle");
+ LLVM_DEBUG(dbgs() << "Ifcvt (Triangle");
if (isFalse)
- DEBUG(dbgs() << " false");
+ LLVM_DEBUG(dbgs() << " false");
if (isRev)
- DEBUG(dbgs() << " rev");
- DEBUG(dbgs() << "): " << printMBBReference(*BBI.BB)
- << " (T:" << BBI.TrueBB->getNumber()
- << ",F:" << BBI.FalseBB->getNumber() << ") ");
+ LLVM_DEBUG(dbgs() << " rev");
+ LLVM_DEBUG(dbgs() << "): " << printMBBReference(*BBI.BB)
+ << " (T:" << BBI.TrueBB->getNumber()
+ << ",F:" << BBI.FalseBB->getNumber() << ") ");
RetVal = IfConvertTriangle(BBI, Kind);
- DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
if (RetVal) {
if (isFalse) {
if (isRev) ++NumTriangleFRev;
@@ -453,24 +453,25 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
}
case ICDiamond:
if (DisableDiamond) break;
- DEBUG(dbgs() << "Ifcvt (Diamond): " << printMBBReference(*BBI.BB)
- << " (T:" << BBI.TrueBB->getNumber()
- << ",F:" << BBI.FalseBB->getNumber() << ") ");
+ LLVM_DEBUG(dbgs() << "Ifcvt (Diamond): " << printMBBReference(*BBI.BB)
+ << " (T:" << BBI.TrueBB->getNumber()
+ << ",F:" << BBI.FalseBB->getNumber() << ") ");
RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2,
Token->TClobbersPred,
Token->FClobbersPred);
- DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
if (RetVal) ++NumDiamonds;
break;
case ICForkedDiamond:
if (DisableForkedDiamond) break;
- DEBUG(dbgs() << "Ifcvt (Forked Diamond): " << printMBBReference(*BBI.BB)
- << " (T:" << BBI.TrueBB->getNumber()
- << ",F:" << BBI.FalseBB->getNumber() << ") ");
+ LLVM_DEBUG(dbgs() << "Ifcvt (Forked Diamond): "
+ << printMBBReference(*BBI.BB)
+ << " (T:" << BBI.TrueBB->getNumber()
+ << ",F:" << BBI.FalseBB->getNumber() << ") ");
RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2,
Token->TClobbersPred,
Token->FClobbersPred);
- DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
if (RetVal) ++NumForkedDiamonds;
break;
}
@@ -948,7 +949,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI,
BBI.ExtraCost2 = 0;
BBI.ClobbersPred = false;
for (MachineInstr &MI : make_range(Begin, End)) {
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
continue;
// It's unsafe to duplicate convergent instructions in this context, so set
@@ -1726,14 +1727,14 @@ bool IfConverter::IfConvertDiamondCommon(
for (unsigned i = 0; i < NumDups1; ++DI1) {
if (DI1 == MBB1.end())
break;
- if (!DI1->isDebugValue())
+ if (!DI1->isDebugInstr())
++i;
}
while (NumDups1 != 0) {
++DI2;
if (DI2 == MBB2.end())
break;
- if (!DI2->isDebugValue())
+ if (!DI2->isDebugInstr())
--NumDups1;
}
@@ -1767,7 +1768,7 @@ bool IfConverter::IfConvertDiamondCommon(
assert(DI1 != MBB1.begin());
--DI1;
// skip dbg_value instructions
- if (!DI1->isDebugValue())
+ if (!DI1->isDebugInstr())
++i;
}
MBB1.erase(DI1, MBB1.end());
@@ -1782,7 +1783,7 @@ bool IfConverter::IfConvertDiamondCommon(
// instructions could be found.
while (DI2 != MBB2.begin()) {
MachineBasicBlock::iterator Prev = std::prev(DI2);
- if (!Prev->isBranch() && !Prev->isDebugValue())
+ if (!Prev->isBranch() && !Prev->isDebugInstr())
break;
DI2 = Prev;
}
@@ -1793,7 +1794,7 @@ bool IfConverter::IfConvertDiamondCommon(
assert(DI2 != MBB2.begin());
--DI2;
// skip dbg_value instructions
- if (!DI2->isDebugValue())
+ if (!DI2->isDebugInstr())
--NumDups2;
}
@@ -1809,7 +1810,7 @@ bool IfConverter::IfConvertDiamondCommon(
SmallSet<unsigned, 4> ExtUses;
if (TII->isProfitableToUnpredicate(MBB1, MBB2)) {
for (const MachineInstr &FI : make_range(MBB2.begin(), DI2)) {
- if (FI.isDebugValue())
+ if (FI.isDebugInstr())
continue;
SmallVector<unsigned, 4> Defs;
for (const MachineOperand &MO : FI.operands()) {
@@ -2002,7 +2003,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
bool AnyUnpred = false;
bool MaySpec = LaterRedefs != nullptr;
for (MachineInstr &I : make_range(BBI.BB->begin(), E)) {
- if (I.isDebugValue() || TII->isPredicated(I))
+ if (I.isDebugInstr() || TII->isPredicated(I))
continue;
// It may be possible not to predicate an instruction if it's the 'true'
// side of a diamond and the 'false' side may re-define the instruction's
@@ -2058,7 +2059,7 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
ToBBI.ExtraCost += NumCycles-1;
ToBBI.ExtraCost2 += ExtraPredCost;
- if (!TII->isPredicated(I) && !MI->isDebugValue()) {
+ if (!TII->isPredicated(I) && !MI->isDebugInstr()) {
if (!TII->PredicateInstruction(*MI, Cond)) {
#ifndef NDEBUG
dbgs() << "Unable to predicate " << I << "!\n";
diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 308b6d293d3d..0a447bc613b1 100644
--- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -115,7 +115,7 @@ class ImplicitNullChecks : public MachineFunctionPass {
/// \c canHandle should return true for all instructions in \p
/// Insts.
DependenceResult computeDependence(const MachineInstr *MI,
- ArrayRef<MachineInstr *> Insts);
+ ArrayRef<MachineInstr *> Block);
/// Represents one null check that can be made implicit.
class NullCheck {
@@ -134,7 +134,7 @@ class ImplicitNullChecks : public MachineFunctionPass {
// The block branched to if the pointer is null.
MachineBasicBlock *NullSucc;
- // If this is non-null, then MemOperation has a dependency on on this
+ // If this is non-null, then MemOperation has a dependency on this
// instruction; and it needs to be hoisted to execute before MemOperation.
MachineInstr *OnlyDependency;
@@ -198,7 +198,7 @@ class ImplicitNullChecks : public MachineFunctionPass {
SuitabilityResult isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
ArrayRef<MachineInstr *> PrevInsts);
- /// Return true if \p FaultingMI can be hoisted from after the the
+ /// Return true if \p FaultingMI can be hoisted from after the
/// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a
/// non-null value if we also need to (and legally can) hoist a depedency.
bool canHoistInst(MachineInstr *FaultingMI, unsigned PointerReg,
@@ -496,6 +496,32 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
if (NotNullSucc->pred_size() != 1)
return false;
+ // To prevent the invalid transformation of the following code:
+ //
+ // mov %rax, %rcx
+ // test %rax, %rax
+ // %rax = ...
+ // je throw_npe
+ // mov(%rcx), %r9
+ // mov(%rax), %r10
+ //
+ // into:
+ //
+ // mov %rax, %rcx
+ // %rax = ....
+ // faulting_load_op("movl (%rax), %r10", throw_npe)
+ // mov(%rcx), %r9
+ //
+ // we must ensure that there are no instructions between the 'test' and
+ // conditional jump that modify %rax.
+ const unsigned PointerReg = MBP.LHS.getReg();
+
+ assert(MBP.ConditionDef->getParent() == &MBB && "Should be in basic block");
+
+ for (auto I = MBB.rbegin(); MBP.ConditionDef != &*I; ++I)
+ if (I->modifiesRegister(PointerReg, TRI))
+ return false;
+
// Starting with a code fragment like:
//
// test %rax, %rax
@@ -550,8 +576,6 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
// ptr could be some non-null invalid reference that never gets loaded from
// because some_cond is always true.
- const unsigned PointerReg = MBP.LHS.getReg();
-
SmallVector<MachineInstr *, 8> InstsSeenSoFar;
for (auto &MI : *NotNullSucc) {
@@ -596,9 +620,8 @@ MachineInstr *ImplicitNullChecks::insertFaultingInstr(
unsigned DefReg = NoRegister;
if (NumDefs != 0) {
- DefReg = MI->defs().begin()->getReg();
- assert(std::distance(MI->defs().begin(), MI->defs().end()) == 1 &&
- "expected exactly one def!");
+ DefReg = MI->getOperand(0).getReg();
+ assert(NumDefs == 1 && "expected exactly one def!");
}
FaultMaps::FaultKind FK;
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index 86ce4b7a9464..007e9283d833 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -46,6 +46,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
@@ -335,7 +336,7 @@ void InlineSpiller::collectRegsToSpill() {
if (isRegToSpill(SnipReg))
continue;
RegsToSpill.push_back(SnipReg);
- DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n');
+ LLVM_DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n');
++NumSnippets;
}
}
@@ -387,8 +388,8 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0));
- DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
- << *StackInt << '\n');
+ LLVM_DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
+ << *StackInt << '\n');
// We are going to spill SrcVNI immediately after its def, so clear out
// any later spills of the same value.
@@ -409,7 +410,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
MRI.getRegClass(SrcReg), &TRI);
--MII; // Point to store instruction.
LIS.InsertMachineInstrInMaps(*MII);
- DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII);
+ LLVM_DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII);
HSpiller.addToMergeableSpills(*MII, StackSlot, Original);
++NumSpills;
@@ -428,8 +429,8 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
LiveInterval *LI;
std::tie(LI, VNI) = WorkList.pop_back_val();
unsigned Reg = LI->reg;
- DEBUG(dbgs() << "Checking redundant spills for "
- << VNI->id << '@' << VNI->def << " in " << *LI << '\n');
+ LLVM_DEBUG(dbgs() << "Checking redundant spills for " << VNI->id << '@'
+ << VNI->def << " in " << *LI << '\n');
// Regs to spill are taken care of.
if (isRegToSpill(Reg))
@@ -437,7 +438,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
// Add all of VNI's live range to StackInt.
StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0));
- DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');
+ LLVM_DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');
// Find all spills and copies of VNI.
for (MachineRegisterInfo::use_instr_nodbg_iterator
@@ -465,7 +466,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
// Erase spills.
int FI;
if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) {
- DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << MI);
+ LLVM_DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << MI);
// eliminateDeadDefs won't normally remove stores, so switch opcode.
MI.setDesc(TII.get(TargetOpcode::KILL));
DeadDefs.push_back(&MI);
@@ -527,13 +528,13 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex());
if (!ParentVNI) {
- DEBUG(dbgs() << "\tadding <undef> flags: ");
+ LLVM_DEBUG(dbgs() << "\tadding <undef> flags: ");
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg)
MO.setIsUndef();
}
- DEBUG(dbgs() << UseIdx << '\t' << MI);
+ LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI);
return true;
}
@@ -547,7 +548,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
if (!Edit->canRematerializeAt(RM, OrigVNI, UseIdx, false)) {
markValueUsed(&VirtReg, ParentVNI);
- DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);
+ LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);
return false;
}
@@ -555,7 +556,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// same register for uses and defs.
if (RI.Tied) {
markValueUsed(&VirtReg, ParentVNI);
- DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << MI);
+ LLVM_DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << MI);
return false;
}
@@ -581,8 +582,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
NewMI->setDebugLoc(MI.getDebugLoc());
(void)DefIdx;
- DEBUG(dbgs() << "\tremat: " << DefIdx << '\t'
- << *LIS.getInstructionFromIndex(DefIdx));
+ LLVM_DEBUG(dbgs() << "\tremat: " << DefIdx << '\t'
+ << *LIS.getInstructionFromIndex(DefIdx));
// Replace operands
for (const auto &OpPair : Ops) {
@@ -592,7 +593,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
MO.setIsKill();
}
}
- DEBUG(dbgs() << "\t " << UseIdx << '\t' << MI << '\n');
+ LLVM_DEBUG(dbgs() << "\t " << UseIdx << '\t' << MI << '\n');
++NumRemats;
return true;
@@ -619,6 +620,9 @@ void InlineSpiller::reMaterializeAll() {
if (MI.isDebugValue())
continue;
+ assert(!MI.isDebugInstr() && "Did not expect to find a use in debug "
+ "instruction that isn't a DBG_VALUE");
+
anyRemat |= reMaterializeFor(LI, MI);
}
}
@@ -637,7 +641,7 @@ void InlineSpiller::reMaterializeAll() {
MI->addRegisterDead(Reg, &TRI);
if (!MI->allDefsAreDead())
continue;
- DEBUG(dbgs() << "All defs dead: " << *MI);
+ LLVM_DEBUG(dbgs() << "All defs dead: " << *MI);
DeadDefs.push_back(MI);
}
}
@@ -646,7 +650,7 @@ void InlineSpiller::reMaterializeAll() {
// deleted here.
if (DeadDefs.empty())
return;
- DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
+ LLVM_DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA);
// LiveRangeEdit::eliminateDeadDef is used to remove dead define instructions
@@ -669,7 +673,8 @@ void InlineSpiller::reMaterializeAll() {
RegsToSpill[ResultPos++] = Reg;
}
RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end());
- DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
+ LLVM_DEBUG(dbgs() << RegsToSpill.size()
+ << " registers to spill after remat.\n");
}
//===----------------------------------------------------------------------===//
@@ -691,7 +696,7 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
if (!IsLoad)
HSpiller.rmFromMergeableSpills(*MI, StackSlot);
- DEBUG(dbgs() << "Coalescing stack access: " << *MI);
+ LLVM_DEBUG(dbgs() << "Coalescing stack access: " << *MI);
LIS.RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
@@ -848,8 +853,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
FoldMI->RemoveOperand(i - 1);
}
- DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS,
- "folded"));
+ LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS,
+ "folded"));
if (!WasCopy)
++NumFolded;
@@ -872,8 +877,8 @@ void InlineSpiller::insertReload(unsigned NewVReg,
LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI);
- DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload",
- NewVReg));
+ LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload",
+ NewVReg));
++NumReloads;
}
@@ -912,8 +917,8 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill,
LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end());
- DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS,
- "spill"));
+ LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS,
+ "spill"));
++NumSpills;
if (IsRealSpill)
HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original);
@@ -921,7 +926,7 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill,
/// spillAroundUses - insert spill code around each use of Reg.
void InlineSpiller::spillAroundUses(unsigned Reg) {
- DEBUG(dbgs() << "spillAroundUses " << printReg(Reg) << '\n');
+ LLVM_DEBUG(dbgs() << "spillAroundUses " << printReg(Reg) << '\n');
LiveInterval &OldLI = LIS.getInterval(Reg);
// Iterate over instructions using Reg.
@@ -934,12 +939,15 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
if (MI->isDebugValue()) {
// Modify DBG_VALUE now that the value is in a spill slot.
MachineBasicBlock *MBB = MI->getParent();
- DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI);
+ LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI);
buildDbgValueForSpill(*MBB, MI, *MI, StackSlot);
MBB->erase(MI);
continue;
}
+ assert(!MI->isDebugInstr() && "Did not expect to find a use in debug "
+ "instruction that isn't a DBG_VALUE");
+
// Ignore copies to/from snippets. We'll delete them.
if (SnippetCopies.count(MI))
continue;
@@ -965,7 +973,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
if (SibReg && isSibling(SibReg)) {
// This may actually be a copy between snippets.
if (isRegToSpill(SibReg)) {
- DEBUG(dbgs() << "Found new snippet copy: " << *MI);
+ LLVM_DEBUG(dbgs() << "Found new snippet copy: " << *MI);
SnippetCopies.insert(MI);
continue;
}
@@ -1008,7 +1016,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
hasLiveDef = true;
}
}
- DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n');
+ LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n');
// FIXME: Use a second vreg if instruction has no tied ops.
if (RI.Writes)
@@ -1034,7 +1042,7 @@ void InlineSpiller::spillAll() {
for (unsigned Reg : RegsToSpill)
StackInt->MergeSegmentsInAsValue(LIS.getInterval(Reg),
StackInt->getValNumInfo(0));
- DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');
+ LLVM_DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');
// Spill around uses of all RegsToSpill.
for (unsigned Reg : RegsToSpill)
@@ -1042,7 +1050,7 @@ void InlineSpiller::spillAll() {
// Hoisted spills may cause dead code.
if (!DeadDefs.empty()) {
- DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
+ LLVM_DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA);
}
@@ -1074,10 +1082,10 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
StackSlot = VRM.getStackSlot(Original);
StackInt = nullptr;
- DEBUG(dbgs() << "Inline spilling "
- << TRI.getRegClassName(MRI.getRegClass(edit.getReg()))
- << ':' << edit.getParent()
- << "\nFrom original " << printReg(Original) << '\n');
+ LLVM_DEBUG(dbgs() << "Inline spilling "
+ << TRI.getRegClassName(MRI.getRegClass(edit.getReg()))
+ << ':' << edit.getParent() << "\nFrom original "
+ << printReg(Original) << '\n');
assert(edit.getParent().isSpillable() &&
"Attempting to spill already spilled value.");
assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
@@ -1261,11 +1269,11 @@ void HoistSpillHelper::getVisitOrders(
"Orders have different size with WorkSet");
#ifndef NDEBUG
- DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n");
+ LLVM_DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n");
SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin();
for (; RIt != Orders.rend(); RIt++)
- DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ",");
- DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ",");
+ LLVM_DEBUG(dbgs() << "\n");
#endif
}
@@ -1374,7 +1382,7 @@ void HoistSpillHelper::runHoistSpills(
// Current Block is the BB containing the new hoisted spill. Add it to
// SpillsToKeep. LiveReg is the source of the new spill.
SpillsToKeep[*RIt] = LiveReg;
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "spills in BB: ";
for (const auto Rspill : SpillsInSubTree)
dbgs() << Rspill->getBlock()->getNumber() << " ";
@@ -1430,7 +1438,7 @@ void HoistSpillHelper::hoistAllSpills() {
if (Ent.second.empty())
continue;
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n"
<< "Equal spills in BB: ";
for (const auto spill : EqValSpills)
@@ -1445,7 +1453,7 @@ void HoistSpillHelper::hoistAllSpills() {
runHoistSpills(OrigLI, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns);
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Finally inserted spills in BB: ";
for (const auto Ispill : SpillsToIns)
dbgs() << Ispill.first->getNumber() << " ";
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
index 72227cc7bba9..82f6e8d8e234 100644
--- a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -48,8 +48,8 @@ void InterferenceCache::reinitPhysRegEntries() {
if (PhysRegEntriesCount == TRI->getNumRegs()) return;
free(PhysRegEntries);
PhysRegEntriesCount = TRI->getNumRegs();
- PhysRegEntries = (unsigned char*)
- calloc(PhysRegEntriesCount, sizeof(unsigned char));
+ PhysRegEntries = static_cast<unsigned char*>(
+ safe_calloc(PhysRegEntriesCount, sizeof(unsigned char)));
}
void InterferenceCache::init(MachineFunction *mf,
diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 9c906d309639..fd2ff162630a 100644
--- a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -104,15 +104,15 @@ private:
/// The maximum supported interleave factor.
unsigned MaxFactor;
- /// \brief Transform an interleaved load into target specific intrinsics.
+ /// Transform an interleaved load into target specific intrinsics.
bool lowerInterleavedLoad(LoadInst *LI,
SmallVector<Instruction *, 32> &DeadInsts);
- /// \brief Transform an interleaved store into target specific intrinsics.
+ /// Transform an interleaved store into target specific intrinsics.
bool lowerInterleavedStore(StoreInst *SI,
SmallVector<Instruction *, 32> &DeadInsts);
- /// \brief Returns true if the uses of an interleaved load by the
+ /// Returns true if the uses of an interleaved load by the
/// extractelement instructions in \p Extracts can be replaced by uses of the
/// shufflevector instructions in \p Shuffles instead. If so, the necessary
/// replacements are also performed.
@@ -136,7 +136,7 @@ FunctionPass *llvm::createInterleavedAccessPass() {
return new InterleavedAccess();
}
-/// \brief Check if the mask is a DE-interleave mask of the given factor
+/// Check if the mask is a DE-interleave mask of the given factor
/// \p Factor like:
/// <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>
static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
@@ -158,7 +158,7 @@ static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
return false;
}
-/// \brief Check if the mask is a DE-interleave mask for an interleaved load.
+/// Check if the mask is a DE-interleave mask for an interleaved load.
///
/// E.g. DE-interleave masks (Factor = 2) could be:
/// <0, 2, 4, 6> (mask of index 0 to extract even elements)
@@ -176,7 +176,7 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
return false;
}
-/// \brief Check if the mask can be used in an interleaved store.
+/// Check if the mask can be used in an interleaved store.
//
/// It checks for a more general pattern than the RE-interleave mask.
/// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...>
@@ -332,7 +332,7 @@ bool InterleavedAccess::lowerInterleavedLoad(
if (!tryReplaceExtracts(Extracts, Shuffles))
return false;
- DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
// Try to create target specific intrinsics to replace the load and shuffles.
if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor))
@@ -424,7 +424,7 @@ bool InterleavedAccess::lowerInterleavedStore(
if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
return false;
- DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
+ LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
// Try to create target specific intrinsics to replace the store and shuffle.
if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
@@ -441,7 +441,7 @@ bool InterleavedAccess::runOnFunction(Function &F) {
if (!TPC || !LowerInterleavedAccesses)
return false;
- DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &TM = TPC->getTM<TargetMachine>();
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 12777d5ed110..eb4099964242 100644
--- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -456,6 +456,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
}
case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_label:
break; // Simply strip out debugging intrinsics
case Intrinsic::eh_typeid_for:
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 4c6e21ab315a..2cd389ce2c11 100644
--- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -16,7 +16,6 @@
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -24,16 +23,22 @@
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+static cl::opt<bool> EnableTrapUnreachable("trap-unreachable",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable generating trap for unreachable"));
+
void LLVMTargetMachine::initAsmInfo() {
MRI = TheTarget.createMCRegInfo(getTargetTriple().str());
MII = TheTarget.createMCInstrInfo();
@@ -79,6 +84,9 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T,
this->RM = RM;
this->CMModel = CM;
this->OptLevel = OL;
+
+ if (EnableTrapUnreachable)
+ this->Options.TrapUnreachable = true;
}
TargetTransformInfo
@@ -113,8 +121,10 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
}
bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
- raw_pwrite_stream &Out, CodeGenFileType FileType,
- MCContext &Context) {
+ raw_pwrite_stream &Out,
+ raw_pwrite_stream *DwoOut,
+ CodeGenFileType FileType,
+ MCContext &Context) {
if (Options.MCOptions.MCSaveTempLabels)
Context.setAllowTemporaryLabels(false);
@@ -131,17 +141,17 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
getTargetTriple(), MAI.getAssemblerDialect(), MAI, MII, MRI);
// Create a code emitter if asked to show the encoding.
- MCCodeEmitter *MCE = nullptr;
+ std::unique_ptr<MCCodeEmitter> MCE;
if (Options.MCOptions.ShowMCEncoding)
- MCE = getTarget().createMCCodeEmitter(MII, MRI, Context);
+ MCE.reset(getTarget().createMCCodeEmitter(MII, MRI, Context));
- MCAsmBackend *MAB =
- getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
+ std::unique_ptr<MCAsmBackend> MAB(
+ getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions));
auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
MCStreamer *S = getTarget().createAsmStreamer(
Context, std::move(FOut), Options.MCOptions.AsmVerbose,
- Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB,
- Options.MCOptions.ShowMCInst);
+ Options.MCOptions.MCUseDwarfDirectory, InstPrinter, std::move(MCE),
+ std::move(MAB), Options.MCOptions.ShowMCInst);
AsmStreamer.reset(S);
break;
}
@@ -159,7 +169,9 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
Triple T(getTargetTriple().str());
AsmStreamer.reset(getTarget().createMCObjectStreamer(
- T, Context, std::unique_ptr<MCAsmBackend>(MAB), Out,
+ T, Context, std::unique_ptr<MCAsmBackend>(MAB),
+ DwoOut ? MAB->createDwoObjectWriter(Out, *DwoOut)
+ : MAB->createObjectWriter(Out),
std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,
Options.MCOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ true));
@@ -184,6 +196,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
raw_pwrite_stream &Out,
+ raw_pwrite_stream *DwoOut,
CodeGenFileType FileType,
bool DisableVerify,
MachineModuleInfo *MMI) {
@@ -194,7 +207,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
if (!Context)
return true;
- if (WillCompleteCodeGenPipeline && addAsmPrinter(PM, Out, FileType, *Context))
+ if (WillCompleteCodeGenPipeline &&
+ addAsmPrinter(PM, Out, DwoOut, FileType, *Context))
return true;
PM.add(createFreeMachineFunctionPass());
@@ -234,7 +248,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
const Triple &T = getTargetTriple();
std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
- T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), Out,
+ T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(Out),
std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,
Options.MCOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ true));
diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
index 8ffd51a550fc..5dbce841cfd5 100644
--- a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -139,3 +140,14 @@ void LatencyPriorityQueue::remove(SUnit *SU) {
std::swap(*I, Queue.back());
Queue.pop_back();
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
+ dbgs() << "Latency Priority Queue\n";
+ dbgs() << " Number of Queue Entries: " << Queue.size() << "\n";
+ for (auto const &SU : Queue) {
+ dbgs() << " ";
+ SU->dump(DAG);
+ }
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
index 996d40ca6e1e..5b52cc66a297 100644
--- a/contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -57,23 +57,23 @@ MachineBlockFrequencyInfo &
LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const {
auto *MBFI = getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
if (MBFI) {
- DEBUG(dbgs() << "MachineBlockFrequencyInfo is available\n");
+ LLVM_DEBUG(dbgs() << "MachineBlockFrequencyInfo is available\n");
return *MBFI;
}
auto &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
- DEBUG(dbgs() << "Building MachineBlockFrequencyInfo on the fly\n");
- DEBUG(if (MLI) dbgs() << "LoopInfo is available\n");
+ LLVM_DEBUG(dbgs() << "Building MachineBlockFrequencyInfo on the fly\n");
+ LLVM_DEBUG(if (MLI) dbgs() << "LoopInfo is available\n");
if (!MLI) {
- DEBUG(dbgs() << "Building LoopInfo on the fly\n");
+ LLVM_DEBUG(dbgs() << "Building LoopInfo on the fly\n");
// First create a dominator tree.
- DEBUG(if (MDT) dbgs() << "DominatorTree is available\n");
+ LLVM_DEBUG(if (MDT) dbgs() << "DominatorTree is available\n");
if (!MDT) {
- DEBUG(dbgs() << "Building DominatorTree on the fly\n");
+ LLVM_DEBUG(dbgs() << "Building DominatorTree on the fly\n");
OwnedMDT = make_unique<MachineDominatorTree>();
OwnedMDT->getBase().recalculate(*MF);
MDT = OwnedMDT.get();
diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
index 8c54751ee833..d06821bdfcce 100644
--- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
+++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Casting.h"
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
index 19ec281079cb..fea83e92de8f 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -40,6 +40,8 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
@@ -64,7 +66,7 @@ using namespace llvm;
STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
-// \brief If @MI is a DBG_VALUE with debug value described by a defined
+// If @MI is a DBG_VALUE with debug value described by a defined
// register, returns the number of this register. In the other case, returns 0.
static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) {
assert(MI.isDebugValue() && "expected a DBG_VALUE");
@@ -81,6 +83,7 @@ private:
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
const TargetFrameLowering *TFI;
+ BitVector CalleeSavedRegs;
LexicalScopes LS;
/// Keeps track of lexical scopes associated with a user value's source
@@ -178,11 +181,11 @@ private:
using VarLocMap = UniqueVector<VarLoc>;
using VarLocSet = SparseBitVector<>;
using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>;
- struct SpillDebugPair {
- MachineInstr *SpillInst;
+ struct TransferDebugPair {
+ MachineInstr *TransferInst;
MachineInstr *DebugInst;
};
- using SpillMap = SmallVector<SpillDebugPair, 4>;
+ using TransferMap = SmallVector<TransferDebugPair, 4>;
/// This holds the working set of currently open ranges. For fast
/// access, this is done both as a set of VarLocIDs, and a map of
@@ -235,18 +238,23 @@ private:
bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF,
unsigned &Reg);
int extractSpillBaseRegAndOffset(const MachineInstr &MI, unsigned &Reg);
+ void insertTransferDebugPair(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ TransferMap &Transfers, VarLocMap &VarLocIDs,
+ unsigned OldVarID, unsigned NewReg = 0);
void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs);
void transferSpillInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, SpillMap &Spills);
+ VarLocMap &VarLocIDs, TransferMap &Transfers);
+ void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, TransferMap &Transfers);
void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
const VarLocMap &VarLocIDs);
bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
- bool transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, SpillMap &Spills,
- bool transferSpills);
+ bool process(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
+ TransferMap &Transfers, bool transferChanges);
bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
@@ -369,6 +377,54 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
}
}
+/// Create new TransferDebugPair and insert it in \p Transfers. The VarLoc
+/// with \p OldVarID should be deleted form \p OpenRanges and replaced with
+/// new VarLoc. If \p NewReg is different than default zero value then the
+/// new location will be register location created by the copy like instruction,
+/// otherwise it is variable's location on the stack.
+void LiveDebugValues::insertTransferDebugPair(
+ MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers,
+ VarLocMap &VarLocIDs, unsigned OldVarID, unsigned NewReg) {
+ const MachineInstr *DMI = &VarLocIDs[OldVarID].MI;
+ MachineFunction *MF = MI.getParent()->getParent();
+ MachineInstr *NewDMI;
+ if (NewReg) {
+ // Create a DBG_VALUE instruction to describe the Var in its new
+ // register location.
+ NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(),
+ DMI->isIndirectDebugValue(), NewReg,
+ DMI->getDebugVariable(), DMI->getDebugExpression());
+ if (DMI->isIndirectDebugValue())
+ NewDMI->getOperand(1).setImm(DMI->getOperand(1).getImm());
+ LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register copy: ";
+ NewDMI->print(dbgs(), false, false, false, TII));
+ } else {
+ // Create a DBG_VALUE instruction to describe the Var in its spilled
+ // location.
+ unsigned SpillBase;
+ int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase);
+ auto *SpillExpr = DIExpression::prepend(DMI->getDebugExpression(),
+ DIExpression::NoDeref, SpillOffset);
+ NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase,
+ DMI->getDebugVariable(), SpillExpr);
+ LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: ";
+ NewDMI->print(dbgs(), false, false, false, TII));
+ }
+
+ // The newly created DBG_VALUE instruction NewDMI must be inserted after
+ // MI. Keep track of the pairing.
+ TransferDebugPair MIP = {&MI, NewDMI};
+ Transfers.push_back(MIP);
+
+ // End all previous ranges of Var.
+ OpenRanges.erase(VarLocIDs[OldVarID].Var);
+
+ // Add the VarLoc to OpenRanges.
+ VarLoc VL(*NewDMI, LS);
+ unsigned LocID = VarLocIDs.insert(VL);
+ OpenRanges.insert(LocID, VL.Var);
+}
+
/// A definition of a register may mark the end of a range.
void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
OpenRangesSet &OpenRanges,
@@ -426,28 +482,51 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
FrameInfo.isSpillSlotObjectIndex(FI)))
return false;
- // In a spill instruction generated by the InlineSpiller the spilled register
- // has its kill flag set. Return false if we don't find such a register.
- Reg = 0;
+ auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) {
+ if (!MO.isReg() || !MO.isUse()) {
+ Reg = 0;
+ return false;
+ }
+ Reg = MO.getReg();
+ return MO.isKill();
+ };
+
for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isUse() && MO.isKill()) {
- Reg = MO.getReg();
- break;
+ // In a spill instruction generated by the InlineSpiller the spilled
+ // register has its kill flag set.
+ if (isKilledReg(MO, Reg))
+ return true;
+ if (Reg != 0) {
+ // Check whether next instruction kills the spilled register.
+ // FIXME: Current solution does not cover search for killed register in
+ // bundles and instructions further down the chain.
+ auto NextI = std::next(MI.getIterator());
+ // Skip next instruction that points to basic block end iterator.
+ if (MI.getParent()->end() == NextI)
+ continue;
+ unsigned RegNext;
+ for (const MachineOperand &MONext : NextI->operands()) {
+ // Return true if we came across the register from the
+ // previous spill instruction that is killed in NextI.
+ if (isKilledReg(MONext, RegNext) && RegNext == Reg)
+ return true;
+ }
}
}
- return Reg != 0;
+ // Return false if we didn't find spilled register.
+ return false;
}
/// A spilled register may indicate that we have to end the current range of
/// a variable and create a new one for the spill location.
-/// We don't want to insert any instructions in transfer(), so we just create
-/// the DBG_VALUE witout inserting it and keep track of it in @Spills.
+/// We don't want to insert any instructions in process(), so we just create
+/// the DBG_VALUE without inserting it and keep track of it in \p Transfers.
/// It will be inserted into the BB when we're done iterating over the
/// instructions.
void LiveDebugValues::transferSpillInst(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
- SpillMap &Spills) {
+ TransferMap &Transfers) {
unsigned Reg;
MachineFunction *MF = MI.getMF();
if (!isSpillInstruction(MI, MF, Reg))
@@ -456,35 +535,49 @@ void LiveDebugValues::transferSpillInst(MachineInstr &MI,
// Check if the register is the location of a debug value.
for (unsigned ID : OpenRanges.getVarLocs()) {
if (VarLocIDs[ID].isDescribedByReg() == Reg) {
- DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
- << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
-
- // Create a DBG_VALUE instruction to describe the Var in its spilled
- // location, but don't insert it yet to avoid invalidating the
- // iterator in our caller.
- unsigned SpillBase;
- int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase);
- const MachineInstr *DMI = &VarLocIDs[ID].MI;
- auto *SpillExpr = DIExpression::prepend(
- DMI->getDebugExpression(), DIExpression::NoDeref, SpillOffset);
- MachineInstr *SpDMI =
- BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase,
- DMI->getDebugVariable(), SpillExpr);
- DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: ";
- SpDMI->print(dbgs(), false, TII));
-
- // The newly created DBG_VALUE instruction SpDMI must be inserted after
- // MI. Keep track of the pairing.
- SpillDebugPair MIP = {&MI, SpDMI};
- Spills.push_back(MIP);
-
- // End all previous ranges of Var.
- OpenRanges.erase(VarLocIDs[ID].Var);
-
- // Add the VarLoc to OpenRanges.
- VarLoc VL(*SpDMI, LS);
- unsigned SpillLocID = VarLocIDs.insert(VL);
- OpenRanges.insert(SpillLocID, VL.Var);
+ LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
+ << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
+ insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID);
+ return;
+ }
+ }
+}
+
+/// If \p MI is a register copy instruction, that copies a previously tracked
+/// value from one register to another register that is callee saved, we
+/// create new DBG_VALUE instruction described with copy destination register.
+void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ TransferMap &Transfers) {
+ const MachineOperand *SrcRegOp, *DestRegOp;
+
+ if (!TII->isCopyInstr(MI, SrcRegOp, DestRegOp) || !SrcRegOp->isKill() ||
+ !DestRegOp->isDef())
+ return;
+
+ auto isCalleSavedReg = [&](unsigned Reg) {
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ if (CalleeSavedRegs.test(*RAI))
+ return true;
+ return false;
+ };
+
+ unsigned SrcReg = SrcRegOp->getReg();
+ unsigned DestReg = DestRegOp->getReg();
+
+ // We want to recognize instructions where destination register is callee
+ // saved register. If register that could be clobbered by the call is
+ // included, there would be a great chance that it is going to be clobbered
+ // soon. It is more likely that previous register location, which is callee
+ // saved, is going to stay unclobbered longer, even if it is killed.
+ if (!isCalleSavedReg(DestReg))
+ return;
+
+ for (unsigned ID : OpenRanges.getVarLocs()) {
+ if (VarLocIDs[ID].isDescribedByReg() == SrcReg) {
+ insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID,
+ DestReg);
return;
}
}
@@ -497,16 +590,18 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
const VarLocMap &VarLocIDs) {
bool Changed = false;
const MachineBasicBlock *CurMBB = MI.getParent();
- if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back())))
+ if (!(MI.isTerminator() || (&MI == &CurMBB->back())))
return false;
if (OpenRanges.empty())
return false;
- DEBUG(for (unsigned ID : OpenRanges.getVarLocs()) {
- // Copy OpenRanges to OutLocs, if not already present.
- dbgs() << "Add to OutLocs: "; VarLocIDs[ID].dump();
- });
+ LLVM_DEBUG(for (unsigned ID
+ : OpenRanges.getVarLocs()) {
+ // Copy OpenRanges to OutLocs, if not already present.
+ dbgs() << "Add to OutLocs: ";
+ VarLocIDs[ID].dump();
+ });
VarLocSet &VLS = OutLocs[CurMBB];
Changed = VLS |= OpenRanges.getVarLocs();
OpenRanges.clear();
@@ -514,14 +609,16 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
}
/// This routine creates OpenRanges and OutLocs.
-bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
- SpillMap &Spills, bool transferSpills) {
+bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
+ TransferMap &Transfers, bool transferChanges) {
bool Changed = false;
transferDebugValue(MI, OpenRanges, VarLocIDs);
transferRegisterDef(MI, OpenRanges, VarLocIDs);
- if (transferSpills)
- transferSpillInst(MI, OpenRanges, VarLocIDs, Spills);
+ if (transferChanges) {
+ transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
+ transferSpillInst(MI, OpenRanges, VarLocIDs, Transfers);
+ }
Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs);
return Changed;
}
@@ -532,7 +629,7 @@ bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
VarLocInMBB &InLocs, const VarLocMap &VarLocIDs,
SmallPtrSet<const MachineBasicBlock *, 16> &Visited) {
- DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
bool Changed = false;
VarLocSet InLocsT; // Temporary incoming locations.
@@ -583,7 +680,7 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
for (auto ID : Diff) {
// This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a
// new range is started for the var from the mbb's beginning by inserting
- // a new DBG_VALUE. transfer() will end this range however appropriate.
+ // a new DBG_VALUE. process() will end this range however appropriate.
const VarLoc &DiffIt = VarLocIDs[ID];
const MachineInstr *DMI = &DiffIt.MI;
MachineInstr *MI =
@@ -592,7 +689,7 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
DMI->getDebugVariable(), DMI->getDebugExpression());
if (DMI->isIndirectDebugValue())
MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
- DEBUG(dbgs() << "Inserted: "; MI->dump(););
+ LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump(););
ILS.set(ID);
++NumInserted;
Changed = true;
@@ -603,7 +700,7 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
/// Calculate the liveness information for the given machine function and
/// extend ranges across basic blocks.
bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
- DEBUG(dbgs() << "\nDebug Range Extension\n");
+ LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");
bool Changed = false;
bool OLChanged = false;
@@ -613,7 +710,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
OpenRangesSet OpenRanges; // Ranges that are open until end of bb.
VarLocInMBB OutLocs; // Ranges that exist beyond bb.
VarLocInMBB InLocs; // Ranges that are incoming after joining.
- SpillMap Spills; // DBG_VALUEs associated with spills.
+ TransferMap Transfers; // DBG_VALUEs associated with spills.
DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
@@ -624,6 +721,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
std::greater<unsigned int>>
Pending;
+ enum : bool { dontTransferChanges = false, transferChanges = true };
+
// Initialize every mbb with OutLocs.
// We are not looking at any spill instructions during the initial pass
// over the BBs. The LiveDebugVariables pass has already created DBG_VALUE
@@ -631,11 +730,11 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
// within the BB in which the spill occurs.
for (auto &MBB : MF)
for (auto &MI : MBB)
- transfer(MI, OpenRanges, OutLocs, VarLocIDs, Spills,
- /*transferSpills=*/false);
+ process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
+ dontTransferChanges);
- DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "OutLocs after initialization",
- dbgs()));
+ LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
+ "OutLocs after initialization", dbgs()));
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
unsigned int RPONumber = 0;
@@ -646,7 +745,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
++RPONumber;
}
// This is a standard "union of predecessor outs" dataflow problem.
- // To solve it, we perform join() and transfer() using the two worklist method
+ // To solve it, we perform join() and process() using the two worklist method
// until the ranges converge.
// Ranges have converged when both worklists are empty.
SmallPtrSet<const MachineBasicBlock *, 16> Visited;
@@ -655,7 +754,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
// thing twice. We could avoid this with a custom priority queue, but this
// is probably not worth it.
SmallPtrSet<MachineBasicBlock *, 16> OnPending;
- DEBUG(dbgs() << "Processing Worklist\n");
+ LLVM_DEBUG(dbgs() << "Processing Worklist\n");
while (!Worklist.empty()) {
MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
Worklist.pop();
@@ -668,19 +767,19 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
// examine spill instructions to see whether they spill registers that
// correspond to user variables.
for (auto &MI : *MBB)
- OLChanged |= transfer(MI, OpenRanges, OutLocs, VarLocIDs, Spills,
- /*transferSpills=*/true);
+ OLChanged |= process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
+ transferChanges);
// Add any DBG_VALUE instructions necessitated by spills.
- for (auto &SP : Spills)
- MBB->insertAfter(MachineBasicBlock::iterator(*SP.SpillInst),
- SP.DebugInst);
- Spills.clear();
+ for (auto &TR : Transfers)
+ MBB->insertAfter(MachineBasicBlock::iterator(*TR.TransferInst),
+ TR.DebugInst);
+ Transfers.clear();
- DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
- "OutLocs after propagating", dbgs()));
- DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs,
- "InLocs after propagating", dbgs()));
+ LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
+ "OutLocs after propagating", dbgs()));
+ LLVM_DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs,
+ "InLocs after propagating", dbgs()));
if (OLChanged) {
OLChanged = false;
@@ -697,8 +796,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
assert(Pending.empty() && "Pending should be empty");
}
- DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs()));
- DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs()));
+ LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs()));
+ LLVM_DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs()));
return Changed;
}
@@ -715,6 +814,8 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
TFI = MF.getSubtarget().getFrameLowering();
+ TFI->determineCalleeSaves(MF, CalleeSavedRegs,
+ make_unique<RegScavenger>().get());
LS.initialize(MF);
bool Changed = ExtendRanges(MF);
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 4ffcffcea693..3ff03ec4a7ee 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -44,6 +44,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
@@ -223,7 +224,12 @@ public:
return L1;
}
- /// getLocationNo - Return the location number that matches Loc.
+ /// Return the location number that matches Loc.
+ ///
+ /// For undef values we always return location number UndefLocNo without
+ /// inserting anything in locations. Since locations is a vector and the
+ /// location number is the position in the vector and UndefLocNo is ~0,
+ /// we would need a very big vector to put the value at the right position.
unsigned getLocationNo(const MachineOperand &LocMO) {
if (LocMO.isReg()) {
if (LocMO.getReg() == 0)
@@ -301,7 +307,7 @@ public:
/// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
/// live. Returns true if any changes were made.
- bool splitRegister(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
+ bool splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
LiveIntervals &LIS);
/// rewriteLocations - Rewrite virtual register locations according to the
@@ -510,7 +516,7 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
if (MI.getNumOperands() != 4 ||
!(MI.getOperand(1).isReg() || MI.getOperand(1).isImm()) ||
!MI.getOperand(2).isMetadata()) {
- DEBUG(dbgs() << "Can't handle " << MI);
+ LLVM_DEBUG(dbgs() << "Can't handle " << MI);
return false;
}
@@ -529,8 +535,8 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
// The DBG_VALUE is described by a virtual register that does not have a
// live interval. Discard the DBG_VALUE.
Discard = true;
- DEBUG(dbgs() << "Discarding debug info (no LIS interval): "
- << Idx << " " << MI);
+ LLVM_DEBUG(dbgs() << "Discarding debug info (no LIS interval): " << Idx
+ << " " << MI);
} else {
// The DBG_VALUE is only valid if either Reg is live out from Idx, or Reg
// is defined dead at Idx (where Idx is the slot index for the instruction
@@ -541,8 +547,8 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
// We have found a DBG_VALUE with the value in a virtual register that
// is not live. Discard the DBG_VALUE.
Discard = true;
- DEBUG(dbgs() << "Discarding debug info (reg not live): "
- << Idx << " " << MI);
+ LLVM_DEBUG(dbgs() << "Discarding debug info (reg not live): " << Idx
+ << " " << MI);
}
}
}
@@ -687,7 +693,8 @@ void UserValue::addDefsFromCopies(
if (CopyValues.empty())
return;
- DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n');
+ LLVM_DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI
+ << '\n');
// Try to add defs of the copied values for each kill point.
for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
@@ -701,8 +708,8 @@ void UserValue::addDefsFromCopies(
LocMap::iterator I = locInts.find(Idx);
if (I.valid() && I.start() <= Idx)
continue;
- DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #"
- << DstVNI->id << " in " << *DstLI << '\n');
+ LLVM_DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #"
+ << DstVNI->id << " in " << *DstLI << '\n');
MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
@@ -759,13 +766,6 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
// function).
}
- // Erase all the undefs.
- for (LocMap::iterator I = locInts.begin(); I.valid();)
- if (I.value().isUndef())
- I.erase();
- else
- ++I;
-
// The computed intervals may extend beyond the range of the debug
// location's lexical scope. In this case, splitting of an interval
// can result in an interval outside of the scope being created,
@@ -850,12 +850,12 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
LIS = &pass.getAnalysis<LiveIntervals>();
TRI = mf.getSubtarget().getRegisterInfo();
- DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
- << mf.getName() << " **********\n");
+ LLVM_DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
+ << mf.getName() << " **********\n");
bool Changed = collectDebugValues(mf);
computeIntervals();
- DEBUG(print(dbgs()));
+ LLVM_DEBUG(print(dbgs()));
ModifiedMF = Changed;
return Changed;
}
@@ -901,7 +901,7 @@ LiveDebugVariables::~LiveDebugVariables() {
bool
UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
LiveIntervals& LIS) {
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Splitting Loc" << OldLocNo << '\t';
print(dbgs(), nullptr);
});
@@ -984,17 +984,22 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
while (LocMapI.valid()) {
DbgValueLocation v = LocMapI.value();
if (v.locNo() == OldLocNo) {
- DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';'
- << LocMapI.stop() << ")\n");
+ LLVM_DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';'
+ << LocMapI.stop() << ")\n");
LocMapI.erase();
} else {
- if (v.locNo() > OldLocNo)
+ // Undef values always have location number UndefLocNo, so don't change
+ // locNo in that case. See getLocationNo().
+ if (!v.isUndef() && v.locNo() > OldLocNo)
LocMapI.setValueUnchecked(v.changeLocNo(v.locNo() - 1));
++LocMapI;
}
}
- DEBUG({dbgs() << "Split result: \t"; print(dbgs(), nullptr);});
+ LLVM_DEBUG({
+ dbgs() << "Split result: \t";
+ print(dbgs(), nullptr);
+ });
return DidChange;
}
@@ -1094,6 +1099,10 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
// physical register.
for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
DbgValueLocation Loc = I.value();
+ // Undef values don't exist in locations (and thus not in LocNoMap either)
+ // so skip over them. See getLocationNo().
+ if (Loc.isUndef())
+ continue;
unsigned NewLocNo = LocNoMap[Loc.locNo()];
I.setValueUnchecked(Loc.changeLocNo(NewLocNo));
I.setStart(I.start());
@@ -1136,7 +1145,7 @@ findNextInsertLocation(MachineBasicBlock *MBB,
unsigned Reg = LocMO.getReg();
// Find the next instruction in the MBB that define the register Reg.
- while (I != MBB->end()) {
+ while (I != MBB->end() && !I->isTerminator()) {
if (!LIS.isNotInMIMap(*I) &&
SlotIndex::isEarlierEqualInstr(StopIdx, LIS.getInstructionIndex(*I)))
break;
@@ -1158,7 +1167,15 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
// Only search within the current MBB.
StopIdx = (MBBEndIdx < StopIdx) ? MBBEndIdx : StopIdx;
MachineBasicBlock::iterator I = findInsertLocation(MBB, StartIdx, LIS);
- MachineOperand &MO = locations[Loc.locNo()];
+ // Undef values don't exist in locations so create new "noreg" register MOs
+ // for them. See getLocationNo().
+ MachineOperand MO = !Loc.isUndef() ?
+ locations[Loc.locNo()] :
+ MachineOperand::CreateReg(/* Reg */ 0, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true);
+
++NumInsertedDebugValues;
assert(cast<DILocalVariable>(Variable)
@@ -1179,14 +1196,8 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index");
do {
- MachineInstrBuilder MIB =
- BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
- .add(MO);
- if (IsIndirect)
- MIB.addImm(0U);
- else
- MIB.addReg(0U, RegState::Debug);
- MIB.addMetadata(Variable).addMetadata(Expr);
+ BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
+ IsIndirect, MO, Variable, Expr);
// Continue and insert DBG_VALUES after every redefinition of register
// associated with the debug value within the range
@@ -1212,11 +1223,11 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
if (trimmedDefs.count(Start))
Start = Start.getPrevIndex();
- DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo());
+ LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo());
MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
- DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
+ LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI);
// This interval may span multiple basic blocks.
// Insert a DBG_VALUE into each one.
@@ -1226,10 +1237,10 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
if (++MBB == MFEnd)
break;
MBBEnd = LIS.getMBBEndIdx(&*MBB);
- DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
+ LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI);
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
if (MBB == MFEnd)
break;
@@ -1238,13 +1249,13 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
}
void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
- DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+ LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
if (!MF)
return;
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
BitVector SpilledLocations;
for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
- DEBUG(userValues[i]->print(dbgs(), TRI));
+ LLVM_DEBUG(userValues[i]->print(dbgs(), TRI));
userValues[i]->rewriteLocations(*VRM, *TRI, SpilledLocations);
userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpilledLocations);
}
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index 302c75133e35..83dd982587c6 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
@@ -991,6 +992,7 @@ void LiveInterval::print(raw_ostream &OS) const {
// Print subranges
for (const SubRange &SR : subranges())
OS << SR;
+ OS << " weight:" << weight;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
index 3e742a6c2f21..36428e0335f9 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -187,7 +187,7 @@ void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
clear();
Size = NSize;
LIUs = static_cast<LiveIntervalUnion*>(
- malloc(sizeof(LiveIntervalUnion)*NSize));
+ safe_malloc(sizeof(LiveIntervalUnion)*NSize));
for (unsigned i = 0; i != Size; ++i)
new(LIUs + i) LiveIntervalUnion(Alloc);
}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm/lib/CodeGen/LiveIntervals.cpp
index 79fdba7e062a..471775f8706b 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -147,7 +148,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)
getRegUnit(i);
}
- DEBUG(dump());
+ LLVM_DEBUG(dump());
return true;
}
@@ -310,7 +311,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
/// entering the entry block or a landing pad.
void LiveIntervals::computeLiveInRegUnits() {
RegUnitRanges.resize(TRI->getNumRegUnits());
- DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n");
+ LLVM_DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n");
// Keep track of the live range sets allocated.
SmallVector<unsigned, 8> NewRanges;
@@ -323,7 +324,7 @@ void LiveIntervals::computeLiveInRegUnits() {
// Create phi-defs at Begin for all live-in registers.
SlotIndex Begin = Indexes->getMBBStartIdx(&MBB);
- DEBUG(dbgs() << Begin << "\t" << printMBBReference(MBB));
+ LLVM_DEBUG(dbgs() << Begin << "\t" << printMBBReference(MBB));
for (const auto &LI : MBB.liveins()) {
for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) {
unsigned Unit = *Units;
@@ -335,12 +336,12 @@ void LiveIntervals::computeLiveInRegUnits() {
}
VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator());
(void)VNI;
- DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << '#' << VNI->id);
+ LLVM_DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << '#' << VNI->id);
}
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
}
- DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n");
+ LLVM_DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n");
// Compute the 'normal' part of the ranges.
for (unsigned Unit : NewRanges)
@@ -357,26 +358,40 @@ static void createSegmentsForValues(LiveRange &LR,
}
}
-using ShrinkToUsesWorkList = SmallVector<std::pair<SlotIndex, VNInfo*>, 16>;
-
-static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
- ShrinkToUsesWorkList &WorkList,
- const LiveRange &OldRange) {
+void LiveIntervals::extendSegmentsToUses(LiveRange &Segments,
+ ShrinkToUsesWorkList &WorkList,
+ unsigned Reg, LaneBitmask LaneMask) {
// Keep track of the PHIs that are in use.
SmallPtrSet<VNInfo*, 8> UsedPHIs;
// Blocks that have already been added to WorkList as live-out.
SmallPtrSet<const MachineBasicBlock*, 16> LiveOut;
+ auto getSubRange = [](const LiveInterval &I, LaneBitmask M)
+ -> const LiveRange& {
+ if (M.none())
+ return I;
+ for (const LiveInterval::SubRange &SR : I.subranges()) {
+ if ((SR.LaneMask & M).any()) {
+ assert(SR.LaneMask == M && "Expecting lane masks to match exactly");
+ return SR;
+ }
+ }
+ llvm_unreachable("Subrange for mask not found");
+ };
+
+ const LiveInterval &LI = getInterval(Reg);
+ const LiveRange &OldRange = getSubRange(LI, LaneMask);
+
// Extend intervals to reach all uses in WorkList.
while (!WorkList.empty()) {
SlotIndex Idx = WorkList.back().first;
VNInfo *VNI = WorkList.back().second;
WorkList.pop_back();
- const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(Idx.getPrevSlot());
- SlotIndex BlockStart = Indexes.getMBBStartIdx(MBB);
+ const MachineBasicBlock *MBB = Indexes->getMBBFromIndex(Idx.getPrevSlot());
+ SlotIndex BlockStart = Indexes->getMBBStartIdx(MBB);
// Extend the live range for VNI to be live at Idx.
- if (VNInfo *ExtVNI = LR.extendInBlock(BlockStart, Idx)) {
+ if (VNInfo *ExtVNI = Segments.extendInBlock(BlockStart, Idx)) {
assert(ExtVNI == VNI && "Unexpected existing value number");
(void)ExtVNI;
// Is this a PHIDef we haven't seen before?
@@ -387,7 +402,7 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
for (const MachineBasicBlock *Pred : MBB->predecessors()) {
if (!LiveOut.insert(Pred).second)
continue;
- SlotIndex Stop = Indexes.getMBBEndIdx(Pred);
+ SlotIndex Stop = Indexes->getMBBEndIdx(Pred);
// A predecessor is not required to have a live-out value for a PHI.
if (VNInfo *PVNI = OldRange.getVNInfoBefore(Stop))
WorkList.push_back(std::make_pair(Stop, PVNI));
@@ -396,24 +411,37 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
}
// VNI is live-in to MBB.
- DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
- LR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI));
+ LLVM_DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
+ Segments.addSegment(LiveRange::Segment(BlockStart, Idx, VNI));
// Make sure VNI is live-out from the predecessors.
for (const MachineBasicBlock *Pred : MBB->predecessors()) {
if (!LiveOut.insert(Pred).second)
continue;
- SlotIndex Stop = Indexes.getMBBEndIdx(Pred);
- assert(OldRange.getVNInfoBefore(Stop) == VNI &&
- "Wrong value out of predecessor");
- WorkList.push_back(std::make_pair(Stop, VNI));
+ SlotIndex Stop = Indexes->getMBBEndIdx(Pred);
+ if (VNInfo *OldVNI = OldRange.getVNInfoBefore(Stop)) {
+ assert(OldVNI == VNI && "Wrong value out of predecessor");
+ (void)OldVNI;
+ WorkList.push_back(std::make_pair(Stop, VNI));
+ } else {
+#ifndef NDEBUG
+ // There was no old VNI. Verify that Stop is jointly dominated
+ // by <undef>s for this live range.
+ assert(LaneMask.any() &&
+ "Missing value out of predecessor for main range");
+ SmallVector<SlotIndex,8> Undefs;
+ LI.computeSubRangeUndefs(Undefs, LaneMask, *MRI, *Indexes);
+ assert(LiveRangeCalc::isJointlyDominated(Pred, Undefs, *Indexes) &&
+ "Missing value out of predecessor for subrange");
+#endif
+ }
}
}
}
bool LiveIntervals::shrinkToUses(LiveInterval *li,
SmallVectorImpl<MachineInstr*> *dead) {
- DEBUG(dbgs() << "Shrink: " << *li << '\n');
+ LLVM_DEBUG(dbgs() << "Shrink: " << *li << '\n');
assert(TargetRegisterInfo::isVirtualRegister(li->reg)
&& "Can only shrink virtual registers");
@@ -442,9 +470,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// This shouldn't happen: readsVirtualRegister returns true, but there is
// no live value. It is likely caused by a target getting <undef> flags
// wrong.
- DEBUG(dbgs() << Idx << '\t' << UseMI
- << "Warning: Instr claims to read non-existent value in "
- << *li << '\n');
+ LLVM_DEBUG(
+ dbgs() << Idx << '\t' << UseMI
+ << "Warning: Instr claims to read non-existent value in "
+ << *li << '\n');
continue;
}
// Special case: An early-clobber tied operand reads and writes the
@@ -458,14 +487,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// Create new live ranges with only minimal live segments per def.
LiveRange NewLR;
createSegmentsForValues(NewLR, make_range(li->vni_begin(), li->vni_end()));
- extendSegmentsToUses(NewLR, *Indexes, WorkList, *li);
+ extendSegmentsToUses(NewLR, WorkList, Reg, LaneBitmask::getNone());
// Move the trimmed segments back.
li->segments.swap(NewLR.segments);
// Handle dead values.
bool CanSeparate = computeDeadValues(*li, dead);
- DEBUG(dbgs() << "Shrunk: " << *li << '\n');
+ LLVM_DEBUG(dbgs() << "Shrunk: " << *li << '\n');
return CanSeparate;
}
@@ -495,7 +524,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
// This is a dead PHI. Remove it.
VNI->markUnused();
LI.removeSegment(I);
- DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n");
+ LLVM_DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n");
MayHaveSplitComponents = true;
} else {
// This is a dead def. Make sure the instruction knows.
@@ -503,7 +532,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
assert(MI && "No instruction defining live value");
MI->addRegisterDead(LI.reg, TRI);
if (dead && MI->allDefsAreDead()) {
- DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);
+ LLVM_DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);
dead->push_back(MI);
}
}
@@ -512,7 +541,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
}
void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {
- DEBUG(dbgs() << "Shrink: " << SR << '\n');
+ LLVM_DEBUG(dbgs() << "Shrink: " << SR << '\n');
assert(TargetRegisterInfo::isVirtualRegister(Reg)
&& "Can only shrink virtual registers");
// Find all the values used, including PHI kills.
@@ -556,7 +585,7 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {
// Create a new live ranges with only minimal live segments per def.
LiveRange NewLR;
createSegmentsForValues(NewLR, make_range(SR.vni_begin(), SR.vni_end()));
- extendSegmentsToUses(NewLR, *Indexes, WorkList, SR);
+ extendSegmentsToUses(NewLR, WorkList, Reg, SR.LaneMask);
// Move the trimmed ranges back.
SR.segments.swap(NewLR.segments);
@@ -571,13 +600,14 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {
continue;
if (VNI->isPHIDef()) {
// This is a dead PHI. Remove it.
- DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
+ LLVM_DEBUG(dbgs() << "Dead PHI at " << VNI->def
+ << " may separate interval\n");
VNI->markUnused();
SR.removeSegment(*Segment);
}
}
- DEBUG(dbgs() << "Shrunk: " << SR << '\n');
+ LLVM_DEBUG(dbgs() << "Shrunk: " << SR << '\n');
}
void LiveIntervals::extendToIndices(LiveRange &LR,
@@ -785,7 +815,7 @@ MachineBasicBlock*
LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
// A local live range must be fully contained inside the block, meaning it is
// defined and killed at instructions, not at block boundaries. It is not
- // live in or or out of any block.
+ // live in or out of any block.
//
// It is technically possible to have a PHI-defined live range identical to a
// single block, but we are going to return false in that case.
@@ -942,7 +972,8 @@ public:
/// Update all live ranges touched by MI, assuming a move from OldIdx to
/// NewIdx.
void updateAllRanges(MachineInstr *MI) {
- DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI);
+ LLVM_DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": "
+ << *MI);
bool hasRegMask = false;
for (MachineOperand &MO : MI->operands()) {
if (MO.isRegMask())
@@ -992,7 +1023,7 @@ private:
void updateRange(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) {
if (!Updated.insert(&LR).second)
return;
- DEBUG({
+ LLVM_DEBUG({
dbgs() << " ";
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
dbgs() << printReg(Reg);
@@ -1007,7 +1038,7 @@ private:
handleMoveDown(LR);
else
handleMoveUp(LR, Reg, LaneMask);
- DEBUG(dbgs() << " -->\t" << LR << '\n');
+ LLVM_DEBUG(dbgs() << " -->\t" << LR << '\n');
LR.verify();
}
@@ -1291,6 +1322,36 @@ private:
if (OldIdxIn != E && SlotIndex::isEarlierInstr(NewIdx, OldIdxIn->end))
OldIdxIn->end = NewIdx.getRegSlot();
}
+ } else if (OldIdxIn != E
+ && SlotIndex::isEarlierInstr(NewIdxOut->start, NewIdx)
+ && SlotIndex::isEarlierInstr(NewIdx, NewIdxOut->end)) {
+ // OldIdxVNI is a dead def that has been moved into the middle of
+ // another value in LR. That can happen when LR is a whole register,
+ // but the dead def is a write to a subreg that is dead at NewIdx.
+ // The dead def may have been moved across other values
+ // in LR, so move OldIdxOut up to NewIdxOut. Slide [NewIdxOut;OldIdxOut)
+ // down one position.
+ // |- X0/NewIdxOut -| ... |- Xn-1 -| |- Xn/OldIdxOut -| |- next - |
+ // => |- X0/NewIdxOut -| |- X0 -| ... |- Xn-1 -| |- next -|
+ std::copy_backward(NewIdxOut, OldIdxOut, std::next(OldIdxOut));
+ // Modify the segment at NewIdxOut and the following segment to meet at
+ // the point of the dead def, with the following segment getting
+ // OldIdxVNI as its value number.
+ *NewIdxOut = LiveRange::Segment(
+ NewIdxOut->start, NewIdxDef.getRegSlot(), NewIdxOut->valno);
+ *(NewIdxOut + 1) = LiveRange::Segment(
+ NewIdxDef.getRegSlot(), (NewIdxOut + 1)->end, OldIdxVNI);
+ OldIdxVNI->def = NewIdxDef;
+ // Modify subsequent segments to be defined by the moved def OldIdxVNI.
+ for (auto Idx = NewIdxOut + 2; Idx <= OldIdxOut; ++Idx)
+ Idx->valno = OldIdxVNI;
+ // Aggressively remove all dead flags from the former dead definition.
+ // Kill/dead flags shouldn't be used while live intervals exist; they
+ // will be reinserted by VirtRegRewriter.
+ if (MachineInstr *KillMI = LIS.getInstructionFromIndex(NewIdx))
+ for (MIBundleOperands MO(*KillMI); MO.isValid(); ++MO)
+ if (MO->isReg() && !MO->isUse())
+ MO->setIsDead(false);
} else {
// OldIdxVNI is a dead def. It may have been moved across other values
// in LR, so move OldIdxOut up to NewIdxOut. Slide [NewIdxOut;OldIdxOut)
@@ -1360,7 +1421,7 @@ private:
MachineBasicBlock::iterator Begin = MBB->begin();
while (MII != Begin) {
- if ((--MII)->isDebugValue())
+ if ((--MII)->isDebugInstr())
continue;
SlotIndex Idx = Indexes->getInstructionIndex(*MII);
@@ -1422,7 +1483,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
MachineInstr &MI = *I;
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
continue;
SlotIndex instrIdx = getInstructionIndex(MI);
@@ -1519,7 +1580,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
MachineInstr &MI = *I;
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
continue;
for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
MOE = MI.operands_end();
@@ -1580,7 +1641,7 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
unsigned NumComp = ConEQ.Classify(LI);
if (NumComp <= 1)
return;
- DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n');
+ LLVM_DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n');
unsigned Reg = LI.reg;
const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
for (unsigned I = 1; I < NumComp; ++I) {
diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
index 277212cf7dac..86c6c8e29f9a 100644
--- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -18,12 +18,13 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-/// \brief Remove all registers from the set that get clobbered by the register
+/// Remove all registers from the set that get clobbered by the register
/// mask.
/// The clobbers set will be the list of live registers clobbered
/// by the regmask.
@@ -44,7 +45,7 @@ void LivePhysRegs::removeRegsInMask(const MachineOperand &MO,
void LivePhysRegs::removeDefs(const MachineInstr &MI) {
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (O->isReg()) {
- if (!O->isDef())
+ if (!O->isDef() || O->isDebug())
continue;
unsigned Reg = O->getReg();
if (!TargetRegisterInfo::isPhysicalRegister(Reg))
@@ -58,7 +59,7 @@ void LivePhysRegs::removeDefs(const MachineInstr &MI) {
/// Add uses to the set.
void LivePhysRegs::addUses(const MachineInstr &MI) {
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (!O->isReg() || !O->readsReg())
+ if (!O->isReg() || !O->readsReg() || O->isDebug())
continue;
unsigned Reg = O->getReg();
if (!TargetRegisterInfo::isPhysicalRegister(Reg))
@@ -85,7 +86,7 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,
SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) {
// Remove killed registers from the set.
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (O->isReg()) {
+ if (O->isReg() && !O->isDebug()) {
unsigned Reg = O->getReg();
if (!TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
@@ -105,9 +106,13 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,
// Add defs to the set.
for (auto Reg : Clobbers) {
- // Skip dead defs. They shouldn't be added to the set.
+ // Skip dead defs and registers clobbered by regmasks. They shouldn't
+ // be added to the set.
if (Reg.second->isReg() && Reg.second->isDead())
continue;
+ if (Reg.second->isRegMask() &&
+ MachineOperand::clobbersPhysReg(Reg.second->getRegMask(), Reg.first))
+ continue;
addReg(Reg.first);
}
}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
index 66c23b7b69ce..04324943dfad 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -584,3 +584,24 @@ void LiveRangeCalc::updateSSA() {
}
} while (Changed);
}
+
+bool LiveRangeCalc::isJointlyDominated(const MachineBasicBlock *MBB,
+ ArrayRef<SlotIndex> Defs,
+ const SlotIndexes &Indexes) {
+ const MachineFunction &MF = *MBB->getParent();
+ BitVector DefBlocks(MF.getNumBlockIDs());
+ for (SlotIndex I : Defs)
+ DefBlocks.set(Indexes.getMBBFromIndex(I)->getNumber());
+
+ SetVector<unsigned> PredQueue;
+ PredQueue.insert(MBB->getNumber());
+ for (unsigned i = 0; i != PredQueue.size(); ++i) {
+ unsigned BN = PredQueue[i];
+ if (DefBlocks[BN])
+ return true;
+ const MachineBasicBlock *B = MF.getBlockNumbered(BN);
+ for (const MachineBasicBlock *P : B->predecessors())
+ PredQueue.insert(P->getNumber());
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
index c4914f23f56d..9f226b154a67 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
@@ -147,7 +147,7 @@ class LiveRangeCalc {
///
/// PhysReg, when set, is used to verify live-in lists on basic blocks.
bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
- SlotIndex Kill, unsigned PhysReg,
+ SlotIndex Use, unsigned PhysReg,
ArrayRef<SlotIndex> Undefs);
/// updateSSA - Compute the values that will be live in to all requested
@@ -282,6 +282,15 @@ public:
/// Every predecessor of a live-in block must have been given a value with
/// setLiveOutValue, the value may be null for live-trough blocks.
void calculateValues();
+
+ /// A diagnostic function to check if the end of the block @p MBB is
+ /// jointly dominated by the blocks corresponding to the slot indices
+ /// in @p Defs. This function is mainly for use in self-verification
+ /// checks.
+ LLVM_ATTRIBUTE_UNUSED
+ static bool isJointlyDominated(const MachineBasicBlock *MBB,
+ ArrayRef<SlotIndex> Defs,
+ const SlotIndexes &Indexes);
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 86cfbd87f5b1..8dfe8b68c3af 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -31,21 +31,24 @@ STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE");
void LiveRangeEdit::Delegate::anchor() { }
-LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) {
+LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg,
+ bool createSubRanges) {
unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
- if (VRM) {
+ if (VRM)
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
- }
+
LiveInterval &LI = LIS.createEmptyInterval(VReg);
if (Parent && !Parent->isSpillable())
LI.markNotSpillable();
- // Create empty subranges if the OldReg's interval has them. Do not create
- // the main range here---it will be constructed later after the subranges
- // have been finalized.
- LiveInterval &OldLI = LIS.getInterval(OldReg);
- VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator();
- for (LiveInterval::SubRange &S : OldLI.subranges())
- LI.createSubRange(Alloc, S.LaneMask);
+ if (createSubRanges) {
+ // Create empty subranges if the OldReg's interval has them. Do not create
+ // the main range here---it will be constructed later after the subranges
+ // have been finalized.
+ LiveInterval &OldLI = LIS.getInterval(OldReg);
+ VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator();
+ for (LiveInterval::SubRange &S : OldLI.subranges())
+ LI.createSubRange(Alloc, S.LaneMask);
+ }
return LI;
}
@@ -217,8 +220,8 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
if (!DefMI->isSafeToMove(nullptr, SawStore))
return false;
- DEBUG(dbgs() << "Try to fold single def: " << *DefMI
- << " into single use: " << *UseMI);
+ LLVM_DEBUG(dbgs() << "Try to fold single def: " << *DefMI
+ << " into single use: " << *UseMI);
SmallVector<unsigned, 8> Ops;
if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second)
@@ -227,7 +230,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
MachineInstr *FoldMI = TII.foldMemoryOperand(*UseMI, Ops, *DefMI, &LIS);
if (!FoldMI)
return false;
- DEBUG(dbgs() << " folded: " << *FoldMI);
+ LLVM_DEBUG(dbgs() << " folded: " << *FoldMI);
LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI);
UseMI->eraseFromParent();
DefMI->addRegisterDead(LI->reg, nullptr);
@@ -264,18 +267,18 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
}
// Never delete inline asm.
if (MI->isInlineAsm()) {
- DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);
+ LLVM_DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);
return;
}
// Use the same criteria as DeadMachineInstructionElim.
bool SawStore = false;
if (!MI->isSafeToMove(nullptr, SawStore)) {
- DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);
+ LLVM_DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);
return;
}
- DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
+ LLVM_DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
// Collect virtual registers to be erased after MI is gone.
SmallVector<unsigned, 8> RegsToErase;
@@ -349,7 +352,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
continue;
MI->RemoveOperand(i-1);
}
- DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
+ LLVM_DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
} else {
// If the dest of MI is an original reg and MI is reMaterializable,
// don't delete the inst. Replace the dest with a new reg, and keep
@@ -357,12 +360,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
// LiveRangeEdit::DeadRemats and will be deleted after all the
// allocations of the func are done.
if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) {
- LiveInterval &NewLI = createEmptyIntervalFrom(Dest);
- NewLI.removeEmptySubRanges();
+ LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false);
VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI));
pop_back();
- markDeadRemat(MI);
+ DeadRemats->insert(MI);
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
MI->substituteRegister(Dest, NewLI.reg, 0, TRI);
MI->getOperand(0).setIsDead(true);
@@ -463,7 +465,7 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
for (unsigned I = 0, Size = size(); I < Size; ++I) {
LiveInterval &LI = LIS.getInterval(get(I));
if (MRI.recomputeRegClass(LI.reg))
- DEBUG({
+ LLVM_DEBUG({
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
dbgs() << "Inflated " << printReg(LI.reg) << " to "
<< TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n';
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp b/contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp
index 02e1f3b01ade..f75d513c89f5 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -111,7 +111,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
InstOrderMap IOM;
// Map from register to instruction order (value of IOM) where the
@@ -130,7 +130,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) {
MachineInstr &MI = *Next;
++Next;
- if (MI.isPHI() || MI.isDebugValue())
+ if (MI.isPHI() || MI.isDebugInstr())
continue;
if (MI.mayStore())
SawStore = true;
@@ -218,7 +218,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) {
MachineBasicBlock::iterator I = std::next(Insert->getIterator());
// Skip all the PHI and debug instructions.
- while (I != MBB.end() && (I->isPHI() || I->isDebugValue()))
+ while (I != MBB.end() && (I->isPHI() || I->isDebugInstr()))
I = std::next(I);
if (I == MI.getIterator())
continue;
diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
index bd435968296d..e72977b02675 100644
--- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -102,37 +102,37 @@ static bool foreachUnit(const TargetRegisterInfo *TRI,
}
void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
- DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg, TRI)
- << " to " << printReg(PhysReg, TRI) << ':');
+ LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg, TRI) << " to "
+ << printReg(PhysReg, TRI) << ':');
assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
- foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
- const LiveRange &Range) {
- DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << ' ' << Range);
- Matrix[Unit].unify(VirtReg, Range);
- return false;
- });
+ foreachUnit(
+ TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) {
+ LLVM_DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << ' ' << Range);
+ Matrix[Unit].unify(VirtReg, Range);
+ return false;
+ });
++NumAssigned;
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
}
void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
unsigned PhysReg = VRM->getPhys(VirtReg.reg);
- DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI)
- << " from " << printReg(PhysReg, TRI) << ':');
+ LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI) << " from "
+ << printReg(PhysReg, TRI) << ':');
VRM->clearVirt(VirtReg.reg);
- foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
- const LiveRange &Range) {
- DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI));
- Matrix[Unit].extract(VirtReg, Range);
- return false;
- });
+ foreachUnit(TRI, VirtReg, PhysReg,
+ [&](unsigned Unit, const LiveRange &Range) {
+ LLVM_DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI));
+ Matrix[Unit].extract(VirtReg, Range);
+ return false;
+ });
++NumUnassigned;
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
}
bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const {
@@ -205,3 +205,19 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
return IK_Free;
}
+
+bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
+ unsigned PhysReg) {
+ // Construct artificial live range containing only one segment [Start, End).
+ VNInfo valno(0, Start);
+ LiveRange::Segment Seg(Start, End, &valno);
+ LiveRange LR;
+ LR.addSegment(Seg);
+
+ // Check for interference with that segment
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ if (query(LR, *Units).checkInterference())
+ return true;
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp b/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp
index 9f28db6287ba..c22681385492 100644
--- a/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp
@@ -46,7 +46,7 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) {
// Remove defined registers and regmask kills from the set.
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (O->isReg()) {
- if (!O->isDef())
+ if (!O->isDef() || O->isDebug())
continue;
unsigned Reg = O->getReg();
if (!TargetRegisterInfo::isPhysicalRegister(Reg))
@@ -58,7 +58,7 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) {
// Add uses to the set.
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (!O->isReg() || !O->readsReg())
+ if (!O->isReg() || !O->readsReg() || O->isDebug())
continue;
unsigned Reg = O->getReg();
if (!TargetRegisterInfo::isPhysicalRegister(Reg))
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
index 032dd66ae1d2..0b92eab83806 100644
--- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -498,7 +499,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,
void LiveVariables::runOnInstr(MachineInstr &MI,
SmallVectorImpl<unsigned> &Defs) {
- assert(!MI.isDebugValue());
+ assert(!MI.isDebugInstr());
// Process all of the operands of the instruction...
unsigned NumOperandsToProcess = MI.getNumOperands();
@@ -575,7 +576,7 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
DistanceMap.clear();
unsigned Dist = 0;
for (MachineInstr &MI : *MBB) {
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
continue;
DistanceMap.insert(std::make_pair(&MI, Dist++));
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index c0da37ede849..f90ce0c8cd2a 100644
--- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -99,7 +98,6 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<StackProtector>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -109,12 +107,8 @@ namespace {
char LocalStackSlotPass::ID = 0;
char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID;
-
-INITIALIZE_PASS_BEGIN(LocalStackSlotPass, DEBUG_TYPE,
- "Local Stack Slot Allocation", false, false)
-INITIALIZE_PASS_DEPENDENCY(StackProtector)
-INITIALIZE_PASS_END(LocalStackSlotPass, DEBUG_TYPE,
- "Local Stack Slot Allocation", false, false)
+INITIALIZE_PASS(LocalStackSlotPass, DEBUG_TYPE,
+ "Local Stack Slot Allocation", false, false)
bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -164,8 +158,8 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI,
Offset = (Offset + Align - 1) / Align * Align;
int64_t LocalOffset = StackGrowsDown ? -Offset : Offset;
- DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset "
- << LocalOffset << "\n");
+ LLVM_DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset "
+ << LocalOffset << "\n");
// Keep the offset available for base register allocation
LocalOffsets[FrameIdx] = LocalOffset;
// And tell MFI about it for PEI to use later
@@ -202,7 +196,6 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
int64_t Offset = 0;
unsigned MaxAlign = 0;
- StackProtector *SP = &getAnalysis<StackProtector>();
// Make sure that the stack protector comes before the local variables on the
// stack.
@@ -222,16 +215,16 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
if (MFI.getStackProtectorIndex() == (int)i)
continue;
- switch (SP->getSSPLayout(MFI.getObjectAllocation(i))) {
- case StackProtector::SSPLK_None:
+ switch (MFI.getObjectSSPLayout(i)) {
+ case MachineFrameInfo::SSPLK_None:
continue;
- case StackProtector::SSPLK_SmallArray:
+ case MachineFrameInfo::SSPLK_SmallArray:
SmallArrayObjs.insert(i);
continue;
- case StackProtector::SSPLK_AddrOf:
+ case MachineFrameInfo::SSPLK_AddrOf:
AddrOfObjs.insert(i);
continue;
- case StackProtector::SSPLK_LargeArray:
+ case MachineFrameInfo::SSPLK_LargeArray:
LargeArrayObjs.insert(i);
continue;
}
@@ -304,7 +297,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
for (MachineInstr &MI : BB) {
// Debug value, stackmap and patchpoint instructions can't be out of
// range, so they don't need any updates.
- if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STATEPOINT ||
+ if (MI.isDebugInstr() || MI.getOpcode() == TargetOpcode::STATEPOINT ||
MI.getOpcode() == TargetOpcode::STACKMAP ||
MI.getOpcode() == TargetOpcode::PATCHPOINT)
continue;
@@ -335,7 +328,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// Sort the frame references by local offset.
// Use frame index as a tie-breaker in case MI's have the same offset.
- std::sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+ llvm::sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
MachineBasicBlock *Entry = &Fn.front();
@@ -351,7 +344,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
assert(MFI.isObjectPreAllocated(FrameIdx) &&
"Only pre-allocated locals expected!");
- DEBUG(dbgs() << "Considering: " << MI);
+ LLVM_DEBUG(dbgs() << "Considering: " << MI);
unsigned idx = 0;
for (unsigned f = MI.getNumOperands(); idx != f; ++idx) {
@@ -367,7 +360,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
int64_t Offset = 0;
int64_t FrameSizeAdjust = StackGrowsDown ? MFI.getLocalFrameSize() : 0;
- DEBUG(dbgs() << " Replacing FI in: " << MI);
+ LLVM_DEBUG(dbgs() << " Replacing FI in: " << MI);
// If we have a suitable base register available, use it; otherwise
// create a new one. Note that any offset encoded in the
@@ -377,7 +370,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
if (UsedBaseReg &&
lookupCandidateBaseReg(BaseReg, BaseOffset, FrameSizeAdjust,
LocalOffset, MI, TRI)) {
- DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n");
+ LLVM_DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n");
// We found a register to reuse.
Offset = FrameSizeAdjust + LocalOffset - BaseOffset;
} else {
@@ -405,8 +398,9 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
- DEBUG(dbgs() << " Materializing base register " << BaseReg <<
- " at frame local offset " << LocalOffset + InstrOffset << "\n");
+ LLVM_DEBUG(dbgs() << " Materializing base register " << BaseReg
+ << " at frame local offset "
+ << LocalOffset + InstrOffset << "\n");
// Tell the target to insert the instruction to initialize
// the base register.
@@ -427,7 +421,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// Modify the instruction to use the new base register rather
// than the frame index operand.
TRI->resolveFrameIndex(MI, BaseReg, Offset);
- DEBUG(dbgs() << "Resolved: " << MI);
+ LLVM_DEBUG(dbgs() << "Resolved: " << MI);
++NumReplacements;
}
diff --git a/contrib/llvm/lib/CodeGen/LoopTraversal.cpp b/contrib/llvm/lib/CodeGen/LoopTraversal.cpp
new file mode 100644
index 000000000000..a02d10e09d7d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LoopTraversal.cpp
@@ -0,0 +1,77 @@
+//===- LoopTraversal.cpp - Optimal basic block traversal order --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LoopTraversal.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+using namespace llvm;
+
+bool LoopTraversal::isBlockDone(MachineBasicBlock *MBB) {
+ unsigned MBBNumber = MBB->getNumber();
+ assert(MBBNumber < MBBInfos.size() && "Unexpected basic block number.");
+ return MBBInfos[MBBNumber].PrimaryCompleted &&
+ MBBInfos[MBBNumber].IncomingCompleted ==
+ MBBInfos[MBBNumber].PrimaryIncoming &&
+ MBBInfos[MBBNumber].IncomingProcessed == MBB->pred_size();
+}
+
+LoopTraversal::TraversalOrder LoopTraversal::traverse(MachineFunction &MF) {
+ // Initialize the MMBInfos
+ MBBInfos.assign(MF.getNumBlockIDs(), MBBInfo());
+
+ MachineBasicBlock *Entry = &*MF.begin();
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(Entry);
+ SmallVector<MachineBasicBlock *, 4> Workqueue;
+ SmallVector<TraversedMBBInfo, 4> MBBTraversalOrder;
+ for (MachineBasicBlock *MBB : RPOT) {
+ // N.B: IncomingProcessed and IncomingCompleted were already updated while
+ // processing this block's predecessors.
+ unsigned MBBNumber = MBB->getNumber();
+ assert(MBBNumber < MBBInfos.size() && "Unexpected basic block number.");
+ MBBInfos[MBBNumber].PrimaryCompleted = true;
+ MBBInfos[MBBNumber].PrimaryIncoming = MBBInfos[MBBNumber].IncomingProcessed;
+ bool Primary = true;
+ Workqueue.push_back(MBB);
+ while (!Workqueue.empty()) {
+ MachineBasicBlock *ActiveMBB = &*Workqueue.back();
+ Workqueue.pop_back();
+ bool Done = isBlockDone(ActiveMBB);
+ MBBTraversalOrder.push_back(TraversedMBBInfo(ActiveMBB, Primary, Done));
+ for (MachineBasicBlock *Succ : ActiveMBB->successors()) {
+ unsigned SuccNumber = Succ->getNumber();
+ assert(SuccNumber < MBBInfos.size() &&
+ "Unexpected basic block number.");
+ if (!isBlockDone(Succ)) {
+ if (Primary)
+ MBBInfos[SuccNumber].IncomingProcessed++;
+ if (Done)
+ MBBInfos[SuccNumber].IncomingCompleted++;
+ if (isBlockDone(Succ))
+ Workqueue.push_back(Succ);
+ }
+ }
+ Primary = false;
+ }
+ }
+
+ // We need to go through again and finalize any blocks that are not done yet.
+ // This is possible if blocks have dead predecessors, so we didn't visit them
+ // above.
+ for (MachineBasicBlock *MBB : RPOT) {
+ if (!isBlockDone(MBB))
+ MBBTraversalOrder.push_back(TraversedMBBInfo(MBB, false, true));
+ // Don't update successors here. We'll get to them anyway through this
+ // loop.
+ }
+
+ MBBInfos.clear();
+
+ return MBBTraversalOrder;
+}
diff --git a/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp
index 0cf578b50563..36c1d358a9bd 100644
--- a/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp
+++ b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -68,7 +68,7 @@ bool LowerEmuTLS::runOnModule(Module &M) {
return false;
auto &TM = TPC->getTM<TargetMachine>();
- if (!TM.Options.EmulatedTLS)
+ if (!TM.useEmulatedTLS())
return false;
bool Changed = false;
diff --git a/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index 4b676a60a8cd..fa43d13b1b85 100644
--- a/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -43,14 +43,13 @@ extern char &MIRCanonicalizerID;
#define DEBUG_TYPE "mir-canonicalizer"
static cl::opt<unsigned>
-CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
- cl::value_desc("N"),
- cl::desc("Function number to canonicalize."));
+ CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
+ cl::value_desc("N"),
+ cl::desc("Function number to canonicalize."));
-static cl::opt<unsigned>
-CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u),
- cl::value_desc("N"),
- cl::desc("BasicBlock number to canonicalize."));
+static cl::opt<unsigned> CanonicalizeBasicBlockNumber(
+ "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"),
+ cl::desc("BasicBlock number to canonicalize."));
namespace {
@@ -84,9 +83,9 @@ public:
assert(type != RSE_Reg && "Expected a non-register type.");
}
- bool isReg() const { return type == RSE_Reg; }
- bool isFrameIndex() const { return type == RSE_FrameIndex; }
- bool isCandidate() const { return type == RSE_NewCandidate; }
+ bool isReg() const { return type == RSE_Reg; }
+ bool isFrameIndex() const { return type == RSE_FrameIndex; }
+ bool isCandidate() const { return type == RSE_NewCandidate; }
VRType getType() const { return type; }
unsigned getReg() const {
@@ -115,23 +114,49 @@ static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
return RPOList;
}
-// Set a dummy vreg. We use this vregs register class to generate throw-away
-// vregs that are used to skip vreg numbers so that vreg numbers line up.
-static unsigned GetDummyVReg(const MachineFunction &MF) {
- for (auto &MBB : MF) {
- for (auto &MI : MBB) {
- for (auto &MO : MI.operands()) {
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue;
- return MO.getReg();
- }
- }
+static bool
+rescheduleLexographically(std::vector<MachineInstr *> instructions,
+ MachineBasicBlock *MBB,
+ std::function<MachineBasicBlock::iterator()> getPos) {
+
+ bool Changed = false;
+ using StringInstrPair = std::pair<std::string, MachineInstr *>;
+ std::vector<StringInstrPair> StringInstrMap;
+
+ for (auto *II : instructions) {
+ std::string S;
+ raw_string_ostream OS(S);
+ II->print(OS);
+ OS.flush();
+
+ // Trim the assignment, or start from the begining in the case of a store.
+ const size_t i = S.find("=");
+ StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
+ }
+
+ llvm::sort(StringInstrMap.begin(), StringInstrMap.end(),
+ [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
+ return (a.first < b.first);
+ });
+
+ for (auto &II : StringInstrMap) {
+
+ LLVM_DEBUG({
+ dbgs() << "Splicing ";
+ II.second->dump();
+ dbgs() << " right before: ";
+ getPos()->dump();
+ });
+
+ Changed = true;
+ MBB->splice(getPos(), MBB, II.second);
}
- return ~0U;
+ return Changed;
}
-static bool rescheduleCanonically(MachineBasicBlock *MBB) {
+static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
+ MachineBasicBlock *MBB) {
bool Changed = false;
@@ -153,15 +178,62 @@ static bool rescheduleCanonically(MachineBasicBlock *MBB) {
Instructions.push_back(&MI);
}
+ std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
+ std::vector<MachineInstr *> PseudoIdempotentInstructions;
+ std::vector<unsigned> PhysRegDefs;
+ for (auto *II : Instructions) {
+ for (unsigned i = 1; i < II->getNumOperands(); i++) {
+ MachineOperand &MO = II->getOperand(i);
+ if (!MO.isReg())
+ continue;
+
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+
+ if (!MO.isDef())
+ continue;
+
+ PhysRegDefs.push_back(MO.getReg());
+ }
+ }
+
for (auto *II : Instructions) {
if (II->getNumOperands() == 0)
continue;
+ if (II->mayLoadOrStore())
+ continue;
MachineOperand &MO = II->getOperand(0);
if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
+ if (!MO.isDef())
+ continue;
+
+ bool IsPseudoIdempotent = true;
+ for (unsigned i = 1; i < II->getNumOperands(); i++) {
+
+ if (II->getOperand(i).isImm()) {
+ continue;
+ }
+
+ if (II->getOperand(i).isReg()) {
+ if (!TargetRegisterInfo::isVirtualRegister(II->getOperand(i).getReg()))
+ if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
+ PhysRegDefs.end()) {
+ continue;
+ }
+ }
- DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
+ IsPseudoIdempotent = false;
+ break;
+ }
+
+ if (IsPseudoIdempotent) {
+ PseudoIdempotentInstructions.push_back(II);
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
MachineInstr *Def = II;
unsigned Distance = ~0U;
@@ -194,9 +266,6 @@ static bool rescheduleCanonically(MachineBasicBlock *MBB) {
if (DefI != BBE && UseI != BBE)
break;
- if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo))
- continue;
-
if (&*BBI == Def) {
DefI = BBI;
continue;
@@ -211,17 +280,80 @@ static bool rescheduleCanonically(MachineBasicBlock *MBB) {
if (DefI == BBE || UseI == BBE)
continue;
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Splicing ";
DefI->dump();
dbgs() << " right before: ";
UseI->dump();
});
+ MultiUsers[UseToBringDefCloserTo].push_back(Def);
Changed = true;
MBB->splice(UseI, MBB, DefI);
}
+ // Sort the defs for users of multiple defs lexographically.
+ for (const auto &E : MultiUsers) {
+
+ auto UseI =
+ std::find_if(MBB->instr_begin(), MBB->instr_end(),
+ [&](MachineInstr &MI) -> bool { return &MI == E.first; });
+
+ if (UseI == MBB->instr_end())
+ continue;
+
+ LLVM_DEBUG(
+ dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
+ Changed |= rescheduleLexographically(
+ E.second, MBB, [&]() -> MachineBasicBlock::iterator { return UseI; });
+ }
+
+ PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
+ LLVM_DEBUG(
+ dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
+ Changed |= rescheduleLexographically(
+ PseudoIdempotentInstructions, MBB,
+ [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
+
+ return Changed;
+}
+
+static bool propagateLocalCopies(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ std::vector<MachineInstr *> Copies;
+ for (MachineInstr &MI : MBB->instrs()) {
+ if (MI.isCopy())
+ Copies.push_back(&MI);
+ }
+
+ for (MachineInstr *MI : Copies) {
+
+ if (!MI->getOperand(0).isReg())
+ continue;
+ if (!MI->getOperand(1).isReg())
+ continue;
+
+ const unsigned Dst = MI->getOperand(0).getReg();
+ const unsigned Src = MI->getOperand(1).getReg();
+
+ if (!TargetRegisterInfo::isVirtualRegister(Dst))
+ continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Src))
+ continue;
+ if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
+ continue;
+
+ for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
+ MachineOperand *MO = &*UI;
+ MO->setReg(Src);
+ Changed = true;
+ }
+
+ MI->eraseFromParent();
+ }
+
return Changed;
}
@@ -245,7 +377,8 @@ static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) {
DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst);
for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
- if (DoesMISideEffect) break;
+ if (DoesMISideEffect)
+ break;
DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent());
}
}
@@ -253,7 +386,7 @@ static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) {
if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect)
continue;
- DEBUG(dbgs() << "Found Candidate: "; MI->dump(););
+ LLVM_DEBUG(dbgs() << "Found Candidate: "; MI->dump(););
Candidates.push_back(MI);
}
@@ -274,7 +407,7 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs,
RegQueue.pop();
if (TReg.isFrameIndex()) {
- DEBUG(dbgs() << "Popping frame index.\n";);
+ LLVM_DEBUG(dbgs() << "Popping frame index.\n";);
VRegs.push_back(TypedVReg(RSE_FrameIndex));
continue;
}
@@ -283,7 +416,7 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs,
unsigned Reg = TReg.getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Popping vreg ";
MRI.def_begin(Reg)->dump();
dbgs() << "\n";
@@ -295,7 +428,7 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs,
VRegs.push_back(TypedVReg(Reg));
}
} else {
- DEBUG(dbgs() << "Popping physreg.\n";);
+ LLVM_DEBUG(dbgs() << "Popping physreg.\n";);
VRegs.push_back(TypedVReg(Reg));
continue;
}
@@ -311,7 +444,7 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs,
break;
}
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "\n========================\n";
dbgs() << "Visited MI: ";
Def->dump();
@@ -323,7 +456,7 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs,
MachineOperand &MO = Def->getOperand(I);
if (MO.isFI()) {
- DEBUG(dbgs() << "Pushing frame index.\n";);
+ LLVM_DEBUG(dbgs() << "Pushing frame index.\n";);
RegQueue.push(TypedVReg(RSE_FrameIndex));
}
@@ -335,33 +468,56 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs,
}
}
-// TODO: Work to remove this in the future. One day when we have named vregs
-// we should be able to form the canonical name based on some characteristic
-// we see in that point of the expression tree (like if we were to name based
-// on some sort of value numbering scheme).
-static void SkipVRegs(unsigned &VRegGapIndex, MachineRegisterInfo &MRI,
- const TargetRegisterClass *RC) {
- const unsigned VR_GAP = (++VRegGapIndex * 1000);
-
- DEBUG({
- dbgs() << "Adjusting per-BB VR_GAP for BB" << VRegGapIndex << " to "
- << VR_GAP << "\n";
- });
+namespace {
+class NamedVRegCursor {
+ MachineRegisterInfo &MRI;
+ unsigned virtualVRegNumber;
+
+public:
+ NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI) {
+ unsigned VRegGapIndex = 0;
+ const unsigned VR_GAP = (++VRegGapIndex * 1000);
+
+ unsigned I = MRI.createIncompleteVirtualRegister();
+ const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
- unsigned I = MRI.createVirtualRegister(RC);
- const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
- while (I != E) {
- I = MRI.createVirtualRegister(RC);
+ virtualVRegNumber = E;
}
-}
+
+ void SkipVRegs() {
+ unsigned VRegGapIndex = 1;
+ const unsigned VR_GAP = (++VRegGapIndex * 1000);
+
+ unsigned I = virtualVRegNumber;
+ const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
+
+ virtualVRegNumber = E;
+ }
+
+ unsigned getVirtualVReg() const { return virtualVRegNumber; }
+
+ unsigned incrementVirtualVReg(unsigned incr = 1) {
+ virtualVRegNumber += incr;
+ return virtualVRegNumber;
+ }
+
+ unsigned createVirtualRegister(const TargetRegisterClass *RC) {
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "namedVReg" << (virtualVRegNumber & ~0x80000000);
+ OS.flush();
+ virtualVRegNumber++;
+
+ return MRI.createVirtualRegister(RC, OS.str());
+ }
+};
+} // namespace
static std::map<unsigned, unsigned>
GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,
const std::vector<unsigned> &renamedInOtherBB,
- MachineRegisterInfo &MRI,
- const TargetRegisterClass *RC) {
+ MachineRegisterInfo &MRI, NamedVRegCursor &NVC) {
std::map<unsigned, unsigned> VRegRenameMap;
- unsigned LastRenameReg = MRI.createVirtualRegister(RC);
bool FirstCandidate = true;
for (auto &vreg : VRegs) {
@@ -370,8 +526,9 @@ GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,
// (especially when comparing SelectionDAG to GlobalISel generated MIR)
// that in the other file we are just getting an incoming vreg that comes
// from a copy from a frame index. So it's safe to skip by one.
- LastRenameReg = MRI.createVirtualRegister(RC);
- DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";);
+ unsigned LastRenameReg = NVC.incrementVirtualVReg();
+ (void)LastRenameReg;
+ LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";);
continue;
} else if (vreg.isCandidate()) {
@@ -380,20 +537,15 @@ GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,
// same vreg number making it more likely that the canonical walk from the
// candidate insruction. We don't need to skip from the first candidate of
// the BasicBlock because we already skip ahead several vregs for each BB.
- while (LastRenameReg % 10) {
- if (!FirstCandidate) break;
- LastRenameReg = MRI.createVirtualRegister(RC);
-
- DEBUG({
- dbgs() << "Skipping rename for new candidate " << LastRenameReg
- << "\n";
- });
- }
+ unsigned LastRenameReg = NVC.getVirtualVReg();
+ if (FirstCandidate)
+ NVC.incrementVirtualVReg(LastRenameReg % 10);
FirstCandidate = false;
continue;
} else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) {
- LastRenameReg = MRI.createVirtualRegister(RC);
- DEBUG({
+ unsigned LastRenameReg = NVC.incrementVirtualVReg();
+ (void)LastRenameReg;
+ LLVM_DEBUG({
dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n";
});
continue;
@@ -401,27 +553,27 @@ GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,
auto Reg = vreg.getReg();
if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) {
- DEBUG(dbgs() << "Vreg " << Reg << " already renamed in other BB.\n";);
+ LLVM_DEBUG(dbgs() << "Vreg " << Reg
+ << " already renamed in other BB.\n";);
continue;
}
- auto Rename = MRI.createVirtualRegister(MRI.getRegClass(Reg));
- LastRenameReg = Rename;
+ auto Rename = NVC.createVirtualRegister(MRI.getRegClass(Reg));
if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) {
- DEBUG(dbgs() << "Mapping vreg ";);
+ LLVM_DEBUG(dbgs() << "Mapping vreg ";);
if (MRI.reg_begin(Reg) != MRI.reg_end()) {
- DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump(););
+ LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump(););
} else {
- DEBUG(dbgs() << Reg;);
+ LLVM_DEBUG(dbgs() << Reg;);
}
- DEBUG(dbgs() << " to ";);
+ LLVM_DEBUG(dbgs() << " to ";);
if (MRI.reg_begin(Rename) != MRI.reg_end()) {
- DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump(););
+ LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump(););
} else {
- DEBUG(dbgs() << Rename;);
+ LLVM_DEBUG(dbgs() << Rename;);
}
- DEBUG(dbgs() << "\n";);
+ LLVM_DEBUG(dbgs() << "\n";);
VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename));
}
@@ -483,23 +635,25 @@ static bool doDefKillClear(MachineBasicBlock *MBB) {
static bool runOnBasicBlock(MachineBasicBlock *MBB,
std::vector<StringRef> &bbNames,
std::vector<unsigned> &renamedInOtherBB,
- unsigned &basicBlockNum, unsigned &VRegGapIndex) {
+ unsigned &basicBlockNum, unsigned &VRegGapIndex,
+ NamedVRegCursor &NVC) {
if (CanonicalizeBasicBlockNumber != ~0U) {
if (CanonicalizeBasicBlockNumber != basicBlockNum++)
return false;
- DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() << "\n";);
+ LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName()
+ << "\n";);
}
if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) {
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName()
<< "\n";
});
return false;
}
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
dbgs() << "\n\n================================================\n\n";
});
@@ -508,17 +662,18 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
- const unsigned DummyVReg = GetDummyVReg(MF);
- const TargetRegisterClass *DummyRC =
- (DummyVReg == ~0U) ? nullptr : MRI.getRegClass(DummyVReg);
- if (!DummyRC) return false;
-
bbNames.push_back(MBB->getName());
- DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
+ LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
- DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
- Changed |= rescheduleCanonically(MBB);
- DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
+ LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
+ MBB->dump(););
+ Changed |= propagateLocalCopies(MBB);
+ LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
+
+ LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
+ unsigned IdempotentInstCount = 0;
+ Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
+ LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
std::vector<MachineInstr *> VisitedMIs;
@@ -543,7 +698,7 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
continue;
- DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";);
+ LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";);
RegQueue.push(TypedVReg(MO.getReg()));
}
@@ -560,10 +715,10 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
if (!MO.isReg() && !MO.isFI())
continue;
- DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";);
+ LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";);
- RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) :
- TypedVReg(RSE_FrameIndex));
+ RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg())
+ : TypedVReg(RSE_FrameIndex));
}
doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB);
@@ -574,15 +729,38 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
if (VRegs.size() == 0)
return Changed;
- // Skip some vregs, so we can recon where we'll land next.
- SkipVRegs(VRegGapIndex, MRI, DummyRC);
-
- auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC);
+ auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC);
Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI);
+
+ // Here we renumber the def vregs for the idempotent instructions from the top
+ // of the MachineBasicBlock so that they are named in the order that we sorted
+ // them alphabetically. Eventually we wont need SkipVRegs because we will use
+ // named vregs instead.
+ NVC.SkipVRegs();
+
+ auto MII = MBB->begin();
+ for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
+ MachineInstr &MI = *MII++;
+ Changed = true;
+ unsigned vRegToRename = MI.getOperand(0).getReg();
+ auto Rename = NVC.createVirtualRegister(MRI.getRegClass(vRegToRename));
+
+ std::vector<MachineOperand *> RenameMOs;
+ for (auto &MO : MRI.reg_operands(vRegToRename)) {
+ RenameMOs.push_back(&MO);
+ }
+
+ for (auto *MO : RenameMOs) {
+ MO->setReg(Rename);
+ }
+ }
+
Changed |= doDefKillClear(MBB);
- DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";);
- DEBUG(dbgs() << "\n\n================================================\n\n");
+ LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
+ dbgs() << "\n";);
+ LLVM_DEBUG(
+ dbgs() << "\n\n================================================\n\n");
return Changed;
}
@@ -592,22 +770,21 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
if (CanonicalizeFunctionNumber != ~0U) {
if (CanonicalizeFunctionNumber != functionNum++)
return false;
- DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() << "\n";);
+ LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
+ << "\n";);
}
// we need a valid vreg to create a vreg type for skipping all those
// stray vreg numbers so reach alignment/canonical vreg values.
- std::vector<MachineBasicBlock*> RPOList = GetRPOList(MF);
+ std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
- DEBUG(
- dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
- dbgs() << "\n\n================================================\n\n";
- dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
- for (auto MBB : RPOList) {
- dbgs() << MBB->getName() << "\n";
- }
- dbgs() << "\n\n================================================\n\n";
- );
+ LLVM_DEBUG(
+ dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
+ dbgs() << "\n\n================================================\n\n";
+ dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
+ for (auto MBB
+ : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
+ << "\n\n================================================\n\n";);
std::vector<StringRef> BBNames;
std::vector<unsigned> RenamedInOtherBB;
@@ -617,9 +794,11 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ NamedVRegCursor NVC(MRI);
for (auto MBB : RPOList)
- Changed |= runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx);
+ Changed |=
+ runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx, NVC);
return Changed;
}
-
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 6adb7f1288d7..da05c9a22785 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -179,23 +179,6 @@ static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type,
return C;
}
-static Cursor maybeLexIntegerOrScalarType(Cursor C, MIToken &Token) {
- if ((C.peek() != 'i' && C.peek() != 's' && C.peek() != 'p') ||
- !isdigit(C.peek(1)))
- return None;
- char Kind = C.peek();
- auto Range = C;
- C.advance(); // Skip 'i', 's', or 'p'
- while (isdigit(C.peek()))
- C.advance();
-
- Token.reset(Kind == 'i'
- ? MIToken::IntegerType
- : (Kind == 's' ? MIToken::ScalarType : MIToken::PointerType),
- Range.upto(C));
- return C;
-}
-
static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
return StringSwitch<MIToken::TokenKind>(Identifier)
.Case("_", MIToken::underscore)
@@ -211,6 +194,14 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("renamable", MIToken::kw_renamable)
.Case("tied-def", MIToken::kw_tied_def)
.Case("frame-setup", MIToken::kw_frame_setup)
+ .Case("frame-destroy", MIToken::kw_frame_destroy)
+ .Case("nnan", MIToken::kw_nnan)
+ .Case("ninf", MIToken::kw_ninf)
+ .Case("nsz", MIToken::kw_nsz)
+ .Case("arcp", MIToken::kw_arcp)
+ .Case("contract", MIToken::kw_contract)
+ .Case("afn", MIToken::kw_afn)
+ .Case("reassoc", MIToken::kw_reassoc)
.Case("debug-location", MIToken::kw_debug_location)
.Case("same_value", MIToken::kw_cfi_same_value)
.Case("offset", MIToken::kw_cfi_offset)
@@ -241,6 +232,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("dereferenceable", MIToken::kw_dereferenceable)
.Case("invariant", MIToken::kw_invariant)
.Case("align", MIToken::kw_align)
+ .Case("addrspace", MIToken::kw_addrspace)
.Case("stack", MIToken::kw_stack)
.Case("got", MIToken::kw_got)
.Case("jump-table", MIToken::kw_jump_table)
@@ -408,17 +400,38 @@ static bool isRegisterChar(char C) {
return isIdentifierChar(C) && C != '.';
}
-static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
- if (C.peek() != '%')
+static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) {
+ Cursor Range = C;
+ C.advance(); // Skip '%'
+ while (isRegisterChar(C.peek()))
+ C.advance();
+ Token.reset(MIToken::NamedVirtualRegister, Range.upto(C))
+ .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%'
+ return C;
+}
+
+static Cursor maybeLexRegister(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ if (C.peek() != '%' && C.peek() != '$')
+ return None;
+
+ if (C.peek() == '%') {
+ if (isdigit(C.peek(1)))
+ return lexVirtualRegister(C, Token);
+
+ if (isRegisterChar(C.peek(1)))
+ return lexNamedVirtualRegister(C, Token);
+
return None;
- if (isdigit(C.peek(1)))
- return lexVirtualRegister(C, Token);
+ }
+
+ assert(C.peek() == '$');
auto Range = C;
- C.advance(); // Skip '%'
+ C.advance(); // Skip '$'
while (isRegisterChar(C.peek()))
C.advance();
Token.reset(MIToken::NamedRegister, Range.upto(C))
- .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%'
+ .setStringValue(Range.upto(C).drop_front(1)); // Drop the '$'
return C;
}
@@ -441,7 +454,7 @@ static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token,
static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token,
ErrorCallbackType ErrorCallback) {
- if (C.peek() != '$')
+ if (C.peek() != '&')
return None;
return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
ErrorCallback);
@@ -620,8 +633,6 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
return C.remaining();
}
- if (Cursor R = maybeLexIntegerOrScalarType(C, Token))
- return R.remaining();
if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
return R.remaining();
if (Cursor R = maybeLexIdentifier(C, Token))
@@ -640,7 +651,7 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
return R.remaining();
if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback))
return R.remaining();
- if (Cursor R = maybeLexRegister(C, Token))
+ if (Cursor R = maybeLexRegister(C, Token, ErrorCallback))
return R.remaining();
if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback))
return R.remaining();
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
index 0204d549d5d4..e21c71532f79 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -63,6 +63,14 @@ struct MIToken {
kw_renamable,
kw_tied_def,
kw_frame_setup,
+ kw_frame_destroy,
+ kw_nnan,
+ kw_ninf,
+ kw_nsz,
+ kw_arcp,
+ kw_contract,
+ kw_afn,
+ kw_reassoc,
kw_debug_location,
kw_cfi_same_value,
kw_cfi_offset,
@@ -92,6 +100,7 @@ struct MIToken {
kw_non_temporal,
kw_invariant,
kw_align,
+ kw_addrspace,
kw_stack,
kw_got,
kw_jump_table,
@@ -114,12 +123,10 @@ struct MIToken {
// Identifier tokens
Identifier,
- IntegerType,
NamedRegister,
+ NamedVirtualRegister,
MachineBasicBlockLabel,
MachineBasicBlock,
- PointerType,
- ScalarType,
StackObject,
FixedStackObject,
NamedGlobalValue,
@@ -168,7 +175,7 @@ public:
bool isRegister() const {
return Kind == NamedRegister || Kind == underscore ||
- Kind == VirtualRegister;
+ Kind == NamedVirtualRegister || Kind == VirtualRegister;
}
bool isRegisterFlag() const {
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 1a78ae3aad07..a61e7872f1ae 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -98,6 +98,18 @@ VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) {
return *I.first->second;
}
+VRegInfo &PerFunctionMIParsingState::getVRegInfoNamed(StringRef RegName) {
+ assert(RegName != "" && "Expected named reg.");
+
+ auto I = VRegInfosNamed.insert(std::make_pair(RegName.str(), nullptr));
+ if (I.second) {
+ VRegInfo *Info = new (Allocator) VRegInfo;
+ Info->VReg = MF.getRegInfo().createIncompleteVirtualRegister(RegName);
+ I.first->second = Info;
+ }
+ return *I.first->second;
+}
+
namespace {
/// A wrapper struct around the 'MachineOperand' struct that includes a source
@@ -182,6 +194,7 @@ public:
bool parseNamedRegister(unsigned &Reg);
bool parseVirtualRegister(VRegInfo *&Info);
+ bool parseNamedVirtualRegister(VRegInfo *&Info);
bool parseRegister(unsigned &Reg, VRegInfo *&VRegInfo);
bool parseRegisterFlag(unsigned &Flags);
bool parseRegisterClassOrBank(VRegInfo &RegInfo);
@@ -190,7 +203,7 @@ public:
bool parseRegisterOperand(MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx, bool IsDef = false);
bool parseImmediateOperand(MachineOperand &Dest);
- bool parseIRConstant(StringRef::iterator Loc, StringRef Source,
+ bool parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
const Constant *&C);
bool parseIRConstant(StringRef::iterator Loc, const Constant *&C);
bool parseLowLevelType(StringRef::iterator Loc, LLT &Ty);
@@ -209,7 +222,7 @@ public:
bool parseJumpTableIndexOperand(MachineOperand &Dest);
bool parseExternalSymbolOperand(MachineOperand &Dest);
bool parseMDNode(MDNode *&Node);
- bool parseDIExpression(MDNode *&Node);
+ bool parseDIExpression(MDNode *&Expr);
bool parseMetadataOperand(MachineOperand &Dest);
bool parseCFIOffset(int &Offset);
bool parseCFIRegister(unsigned &Reg);
@@ -228,6 +241,7 @@ public:
Optional<unsigned> &TiedDefIdx);
bool parseOffset(int64_t &Offset);
bool parseAlignment(unsigned &Alignment);
+ bool parseAddrspace(unsigned &Addrspace);
bool parseOperandsOffset(MachineOperand &Op);
bool parseIRValue(const Value *&V);
bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags);
@@ -915,15 +929,43 @@ bool MIParser::verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
continue;
return error(Operands.empty() ? Token.location() : Operands.back().End,
Twine("missing implicit register operand '") +
- printImplicitRegisterFlag(I) + " %" +
+ printImplicitRegisterFlag(I) + " $" +
getRegisterName(TRI, I.getReg()) + "'");
}
return false;
}
bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
- if (Token.is(MIToken::kw_frame_setup)) {
- Flags |= MachineInstr::FrameSetup;
+ // Allow frame and fast math flags for OPCODE
+ while (Token.is(MIToken::kw_frame_setup) ||
+ Token.is(MIToken::kw_frame_destroy) ||
+ Token.is(MIToken::kw_nnan) ||
+ Token.is(MIToken::kw_ninf) ||
+ Token.is(MIToken::kw_nsz) ||
+ Token.is(MIToken::kw_arcp) ||
+ Token.is(MIToken::kw_contract) ||
+ Token.is(MIToken::kw_afn) ||
+ Token.is(MIToken::kw_reassoc)) {
+ // Mine frame and fast math flags
+ if (Token.is(MIToken::kw_frame_setup))
+ Flags |= MachineInstr::FrameSetup;
+ if (Token.is(MIToken::kw_frame_destroy))
+ Flags |= MachineInstr::FrameDestroy;
+ if (Token.is(MIToken::kw_nnan))
+ Flags |= MachineInstr::FmNoNans;
+ if (Token.is(MIToken::kw_ninf))
+ Flags |= MachineInstr::FmNoInfs;
+ if (Token.is(MIToken::kw_nsz))
+ Flags |= MachineInstr::FmNsz;
+ if (Token.is(MIToken::kw_arcp))
+ Flags |= MachineInstr::FmArcp;
+ if (Token.is(MIToken::kw_contract))
+ Flags |= MachineInstr::FmContract;
+ if (Token.is(MIToken::kw_afn))
+ Flags |= MachineInstr::FmAfn;
+ if (Token.is(MIToken::kw_reassoc))
+ Flags |= MachineInstr::FmReassoc;
+
lex();
}
if (Token.isNot(MIToken::Identifier))
@@ -943,7 +985,18 @@ bool MIParser::parseNamedRegister(unsigned &Reg) {
return false;
}
+bool MIParser::parseNamedVirtualRegister(VRegInfo *&Info) {
+ assert(Token.is(MIToken::NamedVirtualRegister) && "Expected NamedVReg token");
+ StringRef Name = Token.stringValue();
+ // TODO: Check that the VReg name is not the same as a physical register name.
+ // If it is, then print a warning (when warnings are implemented).
+ Info = &PFS.getVRegInfoNamed(Name);
+ return false;
+}
+
bool MIParser::parseVirtualRegister(VRegInfo *&Info) {
+ if (Token.is(MIToken::NamedVirtualRegister))
+ return parseNamedVirtualRegister(Info);
assert(Token.is(MIToken::VirtualRegister) && "Needs VirtualRegister token");
unsigned ID;
if (getUnsigned(ID))
@@ -959,6 +1012,7 @@ bool MIParser::parseRegister(unsigned &Reg, VRegInfo *&Info) {
return false;
case MIToken::NamedRegister:
return parseNamedRegister(Reg);
+ case MIToken::NamedVirtualRegister:
case MIToken::VirtualRegister:
if (parseVirtualRegister(Info))
return true;
@@ -1249,11 +1303,17 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
}
bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
- if (Token.is(MIToken::ScalarType)) {
+ if (Token.range().front() == 's' || Token.range().front() == 'p') {
+ StringRef SizeStr = Token.range().drop_front();
+ if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit))
+ return error("expected integers after 's'/'p' type character");
+ }
+
+ if (Token.range().front() == 's') {
Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue());
lex();
return false;
- } else if (Token.is(MIToken::PointerType)) {
+ } else if (Token.range().front() == 'p') {
const DataLayout &DL = MF.getDataLayout();
unsigned AS = APSInt(Token.range().drop_front()).getZExtValue();
Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
@@ -1264,38 +1324,60 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
// Now we're looking for a vector.
if (Token.isNot(MIToken::less))
return error(Loc,
- "expected unsized, pN, sN or <N x sM> for GlobalISel type");
-
+ "expected sN, pA, <M x sN>, or <M x pA> for GlobalISel type");
lex();
if (Token.isNot(MIToken::IntegerLiteral))
- return error(Loc, "expected <N x sM> for vctor type");
+ return error(Loc, "expected <M x sN> or <M x pA> for vector type");
uint64_t NumElements = Token.integerValue().getZExtValue();
lex();
if (Token.isNot(MIToken::Identifier) || Token.stringValue() != "x")
- return error(Loc, "expected '<N x sM>' for vector type");
+ return error(Loc, "expected <M x sN> or <M x pA> for vector type");
lex();
- if (Token.isNot(MIToken::ScalarType))
- return error(Loc, "expected '<N x sM>' for vector type");
- uint64_t ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
+ if (Token.range().front() != 's' && Token.range().front() != 'p')
+ return error(Loc, "expected <M x sN> or <M x pA> for vector type");
+ StringRef SizeStr = Token.range().drop_front();
+ if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit))
+ return error("expected integers after 's'/'p' type character");
+
+ if (Token.range().front() == 's')
+ Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue());
+ else if (Token.range().front() == 'p') {
+ const DataLayout &DL = MF.getDataLayout();
+ unsigned AS = APSInt(Token.range().drop_front()).getZExtValue();
+ Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
+ } else
+ return error(Loc, "expected <M x sN> or <M x pA> for vector type");
lex();
if (Token.isNot(MIToken::greater))
- return error(Loc, "expected '<N x sM>' for vector type");
+ return error(Loc, "expected <M x sN> or <M x pA> for vector type");
lex();
- Ty = LLT::vector(NumElements, ScalarSize);
+ Ty = LLT::vector(NumElements, Ty);
return false;
}
bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) {
- assert(Token.is(MIToken::IntegerType));
+ assert(Token.is(MIToken::Identifier));
+ StringRef TypeStr = Token.range();
+ if (TypeStr.front() != 'i' && TypeStr.front() != 's' &&
+ TypeStr.front() != 'p')
+ return error(
+ "a typed immediate operand should start with one of 'i', 's', or 'p'");
+ StringRef SizeStr = Token.range().drop_front();
+ if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit))
+ return error("expected integers after 'i'/'s'/'p' type character");
+
auto Loc = Token.location();
lex();
- if (Token.isNot(MIToken::IntegerLiteral))
- return error("expected an integer literal");
+ if (Token.isNot(MIToken::IntegerLiteral)) {
+ if (Token.isNot(MIToken::Identifier) ||
+ !(Token.range() == "true" || Token.range() == "false"))
+ return error("expected an integer literal");
+ }
const Constant *C = nullptr;
if (parseIRConstant(Loc, C))
return true;
@@ -1876,13 +1958,11 @@ bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) {
assert(Token.stringValue() == "CustomRegMask" && "Expected a custom RegMask");
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- assert(TRI && "Expected target register info");
lex();
if (expectAndConsume(MIToken::lparen))
return true;
- uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs());
+ uint32_t *Mask = MF.allocateRegMask();
while (true) {
if (Token.isNot(MIToken::NamedRegister))
return error("expected a named register");
@@ -1905,9 +1985,7 @@ bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) {
bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::kw_liveout));
- const auto *TRI = MF.getSubtarget().getRegisterInfo();
- assert(TRI && "Expected target register info");
- uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs());
+ uint32_t *Mask = MF.allocateRegMask();
lex();
if (expectAndConsume(MIToken::lparen))
return true;
@@ -1946,11 +2024,10 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
case MIToken::underscore:
case MIToken::NamedRegister:
case MIToken::VirtualRegister:
+ case MIToken::NamedVirtualRegister:
return parseRegisterOperand(Dest, TiedDefIdx);
case MIToken::IntegerLiteral:
return parseImmediateOperand(Dest);
- case MIToken::IntegerType:
- return parseTypedImmediateOperand(Dest);
case MIToken::kw_half:
case MIToken::kw_float:
case MIToken::kw_double:
@@ -2011,8 +2088,10 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
Dest = MachineOperand::CreateRegMask(RegMask);
lex();
break;
- } else
+ } else if (Token.stringValue() == "CustomRegMask") {
return parseCustomRegisterMaskOperand(Dest);
+ } else
+ return parseTypedImmediateOperand(Dest);
default:
// FIXME: Parse the MCSymbol machine operand.
return error("expected a machine operand");
@@ -2091,6 +2170,17 @@ bool MIParser::parseAlignment(unsigned &Alignment) {
return false;
}
+bool MIParser::parseAddrspace(unsigned &Addrspace) {
+ assert(Token.is(MIToken::kw_addrspace));
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+ return error("expected an integer literal after 'addrspace'");
+ if (getUnsigned(Addrspace))
+ return true;
+ lex();
+ return false;
+}
+
bool MIParser::parseOperandsOffset(MachineOperand &Op) {
int64_t Offset = 0;
if (parseOffset(Offset))
@@ -2402,6 +2492,10 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (parseAlignment(BaseAlignment))
return true;
break;
+ case MIToken::kw_addrspace:
+ if (parseAddrspace(Ptr.AddrSpace))
+ return true;
+ break;
case MIToken::md_tbaa:
lex();
if (parseMDNode(AAInfo.TBAA))
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
index 2307881068ef..b06ceb21b740 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
@@ -56,6 +56,7 @@ struct PerFunctionMIParsingState {
DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
DenseMap<unsigned, VRegInfo*> VRegInfos;
+ StringMap<VRegInfo*> VRegInfosNamed;
DenseMap<unsigned, int> FixedStackObjectSlots;
DenseMap<unsigned, int> StackObjectSlots;
DenseMap<unsigned, unsigned> ConstantPoolSlots;
@@ -66,7 +67,8 @@ struct PerFunctionMIParsingState {
const Name2RegClassMap &Names2RegClasses,
const Name2RegBankMap &Names2RegBanks);
- VRegInfo &getVRegInfo(unsigned VReg);
+ VRegInfo &getVRegInfo(unsigned Num);
+ VRegInfo &getVRegInfoNamed(StringRef RegName);
};
/// Parse the machine basic block definitions, and skip the machine
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 7d8e62736a34..3d2db97acb48 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -122,8 +122,9 @@ public:
const yaml::StringValue &RegisterSource,
bool IsRestored, int FrameIdx);
+ template <typename T>
bool parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
- const yaml::MachineStackObject &Object,
+ const T &Object,
int FrameIdx);
bool initializeConstantPool(PerFunctionMIParsingState &PFS,
@@ -237,7 +238,7 @@ std::unique_ptr<Module> MIRParserImpl::parseIRModule() {
dyn_cast_or_null<yaml::BlockScalarNode>(In.getCurrentNode())) {
SMDiagnostic Error;
M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error,
- Context, &IRSlots);
+ Context, &IRSlots, /*UpgradeDebugInfo=*/false);
if (!M) {
reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange()));
return nullptr;
@@ -362,6 +363,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
MachineFunctionProperties::Property::RegBankSelected);
if (YamlMF.Selected)
MF.getProperties().set(MachineFunctionProperties::Property::Selected);
+ if (YamlMF.FailedISel)
+ MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
PerFunctionMIParsingState PFS(MF, SM, IRSlots, Names2RegClasses,
Names2RegBanks);
@@ -417,6 +420,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
computeFunctionProperties(MF);
+ MF.getSubtarget().mirFileLoaded(MF);
+
MF.verify();
return false;
}
@@ -508,13 +513,12 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
MachineRegisterInfo &MRI = MF.getRegInfo();
bool Error = false;
// Create VRegs
- for (auto P : PFS.VRegInfos) {
- const VRegInfo &Info = *P.second;
+ auto populateVRegInfo = [&] (const VRegInfo &Info, Twine Name) {
unsigned Reg = Info.VReg;
switch (Info.Kind) {
case VRegInfo::UNKNOWN:
error(Twine("Cannot determine class/bank of virtual register ") +
- Twine(P.first) + " in function '" + MF.getName() + "'");
+ Name + " in function '" + MF.getName() + "'");
Error = true;
break;
case VRegInfo::NORMAL:
@@ -528,6 +532,17 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
MRI.setRegBank(Reg, *Info.D.RegBank);
break;
}
+ };
+
+ for (auto I = PFS.VRegInfosNamed.begin(), E = PFS.VRegInfosNamed.end();
+ I != E; I++) {
+ const VRegInfo &Info = *I->second;
+ populateVRegInfo(Info, Twine(I->first()));
+ }
+
+ for (auto P : PFS.VRegInfos) {
+ const VRegInfo &Info = *P.second;
+ populateVRegInfo(Info, Twine(P.first));
}
// Compute MachineRegisterInfo::UsedPhysRegMask
@@ -568,6 +583,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
MFI.setHasVAStart(YamlMFI.HasVAStart);
MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
+ MFI.setLocalFrameSize(YamlMFI.LocalFrameSize);
if (!YamlMFI.SavePoint.Value.empty()) {
MachineBasicBlock *MBB = nullptr;
if (parseMBBReference(PFS, MBB, YamlMFI.SavePoint))
@@ -601,6 +617,8 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister,
Object.CalleeSavedRestored, ObjectIdx))
return true;
+ if (parseStackObjectsDebugInfo(PFS, Object, ObjectIdx))
+ return true;
}
// Initialize the ordinary frame objects.
@@ -685,11 +703,11 @@ static bool typecheckMDNode(T *&Result, MDNode *Node,
return false;
}
+template <typename T>
bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
- const yaml::MachineStackObject &Object, int FrameIdx) {
+ const T &Object, int FrameIdx) {
// Debug information can only be attached to stack objects; Fixed stack
// objects aren't supported.
- assert(FrameIdx >= 0 && "Expected a stack object frame index");
MDNode *Var = nullptr, *Expr = nullptr, *Loc = nullptr;
if (parseMDNode(PFS, Var, Object.DebugVar) ||
parseMDNode(PFS, Expr, Object.DebugExpr) ||
@@ -704,7 +722,7 @@ bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) ||
typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this))
return true;
- PFS.MF.setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc);
+ PFS.MF.setVariableDbgInfo(DIVar, DIExpr, FrameIdx, DILoc);
return false;
}
diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
index f91cca6e4e50..bf8cd1489ec5 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
@@ -157,14 +156,10 @@ public:
void print(const MachineBasicBlock &MBB);
void print(const MachineInstr &MI);
- void printIRValueReference(const Value &V);
void printStackObjectReference(int FrameIndex);
void print(const MachineInstr &MI, unsigned OpIdx,
const TargetRegisterInfo *TRI, bool ShouldPrintRegisterTies,
LLT TypeToPrint, bool PrintDef = true);
- void print(const LLVMContext &Context, const TargetInstrInfo &TII,
- const MachineMemOperand &Op);
- void printSyncScope(const LLVMContext &Context, SyncScope::ID SSID);
};
} // end namespace llvm
@@ -207,6 +202,8 @@ void MIRPrinter::print(const MachineFunction &MF) {
MachineFunctionProperties::Property::RegBankSelected);
YamlMF.Selected = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::Selected);
+ YamlMF.FailedISel = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel);
convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
ModuleSlotTracker MST(MF.getFunction().getParent());
@@ -259,6 +256,21 @@ static void printRegClassOrBank(unsigned Reg, yaml::StringValue &Dest,
OS << printRegClassOrBank(Reg, RegInfo, TRI);
}
+template <typename T>
+static void
+printStackObjectDbgInfo(const MachineFunction::VariableDbgInfo &DebugVar,
+ T &Object, ModuleSlotTracker &MST) {
+ std::array<std::string *, 3> Outputs{{&Object.DebugVar.Value,
+ &Object.DebugExpr.Value,
+ &Object.DebugLoc.Value}};
+ std::array<const Metadata *, 3> Metas{{DebugVar.Var,
+ DebugVar.Expr,
+ DebugVar.Loc}};
+ for (unsigned i = 0; i < 3; ++i) {
+ raw_string_ostream StrOS(*Outputs[i]);
+ Metas[i]->printAsOperand(StrOS, MST);
+ }
+}
void MIRPrinter::convert(yaml::MachineFunction &MF,
const MachineRegisterInfo &RegInfo,
@@ -270,6 +282,8 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
yaml::VirtualRegisterDefinition VReg;
VReg.ID = I;
+ if (RegInfo.getVRegName(Reg) != "")
+ continue;
::printRegClassOrBank(Reg, VReg.Class, RegInfo, TRI);
unsigned PreferredReg = RegInfo.getSimpleHint(Reg);
if (PreferredReg)
@@ -316,6 +330,7 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment();
YamlMFI.HasVAStart = MFI.hasVAStart();
YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
+ YamlMFI.LocalFrameSize = MFI.getLocalFrameSize();
if (MFI.getSavePoint()) {
raw_string_ostream StrOS(YamlMFI.SavePoint.Value);
StrOS << printMBBReference(*MFI.getSavePoint());
@@ -421,19 +436,12 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
assert(StackObjectInfo != StackObjectOperandMapping.end() &&
"Invalid stack object index");
const FrameIndexOperand &StackObject = StackObjectInfo->second;
- assert(!StackObject.IsFixed && "Expected a non-fixed stack object");
- auto &Object = YMF.StackObjects[StackObject.ID];
- {
- raw_string_ostream StrOS(Object.DebugVar.Value);
- DebugVar.Var->printAsOperand(StrOS, MST);
- }
- {
- raw_string_ostream StrOS(Object.DebugExpr.Value);
- DebugVar.Expr->printAsOperand(StrOS, MST);
- }
- {
- raw_string_ostream StrOS(Object.DebugLoc.Value);
- DebugVar.Loc->printAsOperand(StrOS, MST);
+ if (StackObject.IsFixed) {
+ auto &Object = YMF.FixedStackObjects[StackObject.ID];
+ printStackObjectDbgInfo(DebugVar, Object, MST);
+ } else {
+ auto &Object = YMF.StackObjects[StackObject.ID];
+ printStackObjectDbgInfo(DebugVar, Object, MST);
}
}
}
@@ -670,6 +678,23 @@ void MIPrinter::print(const MachineInstr &MI) {
OS << " = ";
if (MI.getFlag(MachineInstr::FrameSetup))
OS << "frame-setup ";
+ if (MI.getFlag(MachineInstr::FrameDestroy))
+ OS << "frame-destroy ";
+ if (MI.getFlag(MachineInstr::FmNoNans))
+ OS << "nnan ";
+ if (MI.getFlag(MachineInstr::FmNoInfs))
+ OS << "ninf ";
+ if (MI.getFlag(MachineInstr::FmNsz))
+ OS << "nsz ";
+ if (MI.getFlag(MachineInstr::FmArcp))
+ OS << "arcp ";
+ if (MI.getFlag(MachineInstr::FmContract))
+ OS << "contract ";
+ if (MI.getFlag(MachineInstr::FmAfn))
+ OS << "afn ";
+ if (MI.getFlag(MachineInstr::FmReassoc))
+ OS << "reassoc ";
+
OS << TII->getName(MI.getOpcode());
if (I < E)
OS << ' ';
@@ -683,46 +708,27 @@ void MIPrinter::print(const MachineInstr &MI) {
NeedComma = true;
}
- if (MI.getDebugLoc()) {
+ if (const DebugLoc &DL = MI.getDebugLoc()) {
if (NeedComma)
OS << ',';
OS << " debug-location ";
- MI.getDebugLoc()->printAsOperand(OS, MST);
+ DL->printAsOperand(OS, MST);
}
if (!MI.memoperands_empty()) {
OS << " :: ";
const LLVMContext &Context = MF->getFunction().getContext();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
bool NeedComma = false;
for (const auto *Op : MI.memoperands()) {
if (NeedComma)
OS << ", ";
- print(Context, *TII, *Op);
+ Op->print(OS, MST, SSNs, Context, &MFI, TII);
NeedComma = true;
}
}
}
-void MIPrinter::printIRValueReference(const Value &V) {
- if (isa<GlobalValue>(V)) {
- V.printAsOperand(OS, /*PrintType=*/false, MST);
- return;
- }
- if (isa<Constant>(V)) {
- // Machine memory operands can load/store to/from constant value pointers.
- OS << '`';
- V.printAsOperand(OS, /*PrintType=*/true, MST);
- OS << '`';
- return;
- }
- OS << "%ir.";
- if (V.hasName()) {
- printLLVMNameWithoutPrefix(OS, V.getName());
- return;
- }
- MachineOperand::printIRSlotNumber(OS, MST.getLocalSlot(&V));
-}
-
void MIPrinter::printStackObjectReference(int FrameIndex) {
auto ObjectInfo = StackObjectOperandMapping.find(FrameIndex);
assert(ObjectInfo != StackObjectOperandMapping.end() &&
@@ -741,7 +747,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
case MachineOperand::MO_Immediate:
if (MI.isOperandSubregIdx(OpIdx)) {
MachineOperand::printTargetFlags(OS, Op);
- MachineOperand::printSubregIdx(OS, Op.getImm(), TRI);
+ MachineOperand::printSubRegIdx(OS, Op.getImm(), TRI);
break;
}
LLVM_FALLTHROUGH;
@@ -765,8 +771,8 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef())
TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx);
const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo();
- Op.print(OS, MST, TypeToPrint, PrintDef, ShouldPrintRegisterTies,
- TiedOperandIdx, TRI, TII);
+ Op.print(OS, MST, TypeToPrint, PrintDef, /*IsStandalone=*/false,
+ ShouldPrintRegisterTies, TiedOperandIdx, TRI, TII);
break;
}
case MachineOperand::MO_FrameIndex:
@@ -783,132 +789,6 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
}
}
-static const char *getTargetMMOFlagName(const TargetInstrInfo &TII,
- unsigned TMMOFlag) {
- auto Flags = TII.getSerializableMachineMemOperandTargetFlags();
- for (const auto &I : Flags) {
- if (I.first == TMMOFlag) {
- return I.second;
- }
- }
- return nullptr;
-}
-
-void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII,
- const MachineMemOperand &Op) {
- OS << '(';
- if (Op.isVolatile())
- OS << "volatile ";
- if (Op.isNonTemporal())
- OS << "non-temporal ";
- if (Op.isDereferenceable())
- OS << "dereferenceable ";
- if (Op.isInvariant())
- OS << "invariant ";
- if (Op.getFlags() & MachineMemOperand::MOTargetFlag1)
- OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag1)
- << "\" ";
- if (Op.getFlags() & MachineMemOperand::MOTargetFlag2)
- OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag2)
- << "\" ";
- if (Op.getFlags() & MachineMemOperand::MOTargetFlag3)
- OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag3)
- << "\" ";
-
- assert((Op.isLoad() || Op.isStore()) && "machine memory operand must be a load or store (or both)");
- if (Op.isLoad())
- OS << "load ";
- if (Op.isStore())
- OS << "store ";
-
- printSyncScope(Context, Op.getSyncScopeID());
-
- if (Op.getOrdering() != AtomicOrdering::NotAtomic)
- OS << toIRString(Op.getOrdering()) << ' ';
- if (Op.getFailureOrdering() != AtomicOrdering::NotAtomic)
- OS << toIRString(Op.getFailureOrdering()) << ' ';
-
- OS << Op.getSize();
- if (const Value *Val = Op.getValue()) {
- OS << ((Op.isLoad() && Op.isStore()) ? " on "
- : Op.isLoad() ? " from " : " into ");
- printIRValueReference(*Val);
- } else if (const PseudoSourceValue *PVal = Op.getPseudoValue()) {
- OS << ((Op.isLoad() && Op.isStore()) ? " on "
- : Op.isLoad() ? " from " : " into ");
- assert(PVal && "Expected a pseudo source value");
- switch (PVal->kind()) {
- case PseudoSourceValue::Stack:
- OS << "stack";
- break;
- case PseudoSourceValue::GOT:
- OS << "got";
- break;
- case PseudoSourceValue::JumpTable:
- OS << "jump-table";
- break;
- case PseudoSourceValue::ConstantPool:
- OS << "constant-pool";
- break;
- case PseudoSourceValue::FixedStack:
- printStackObjectReference(
- cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex());
- break;
- case PseudoSourceValue::GlobalValueCallEntry:
- OS << "call-entry ";
- cast<GlobalValuePseudoSourceValue>(PVal)->getValue()->printAsOperand(
- OS, /*PrintType=*/false, MST);
- break;
- case PseudoSourceValue::ExternalSymbolCallEntry:
- OS << "call-entry $";
- printLLVMNameWithoutPrefix(
- OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
- break;
- case PseudoSourceValue::TargetCustom:
- llvm_unreachable("TargetCustom pseudo source values are not supported");
- break;
- }
- }
- MachineOperand::printOperandOffset(OS, Op.getOffset());
- if (Op.getBaseAlignment() != Op.getSize())
- OS << ", align " << Op.getBaseAlignment();
- auto AAInfo = Op.getAAInfo();
- if (AAInfo.TBAA) {
- OS << ", !tbaa ";
- AAInfo.TBAA->printAsOperand(OS, MST);
- }
- if (AAInfo.Scope) {
- OS << ", !alias.scope ";
- AAInfo.Scope->printAsOperand(OS, MST);
- }
- if (AAInfo.NoAlias) {
- OS << ", !noalias ";
- AAInfo.NoAlias->printAsOperand(OS, MST);
- }
- if (Op.getRanges()) {
- OS << ", !range ";
- Op.getRanges()->printAsOperand(OS, MST);
- }
- OS << ')';
-}
-
-void MIPrinter::printSyncScope(const LLVMContext &Context, SyncScope::ID SSID) {
- switch (SSID) {
- case SyncScope::System: {
- break;
- }
- default: {
- if (SSNs.empty())
- Context.getSyncScopeNames(SSNs);
-
- OS << "syncscope(\"";
- PrintEscapedString(SSNs[SSID], OS);
- OS << "\") ";
- break;
- }
- }
-}
-
void llvm::printMIR(raw_ostream &OS, const Module &M) {
yaml::Output Out(OS);
Out << const_cast<Module &>(M);
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index cd67449e3acf..38e8369dc739 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -173,7 +174,7 @@ MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) {
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
iterator E = end();
- while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue() ||
+ while (I != E && (I->isPHI() || I->isPosition() || I->isDebugInstr() ||
TII->isBasicBlockPrologue(*I)))
++I;
// FIXME: This needs to change if we wish to bundle labels / dbg_values
@@ -186,7 +187,7 @@ MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
iterator B = begin(), E = end(), I = E;
- while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+ while (I != B && ((--I)->isTerminator() || I->isDebugInstr()))
; /*noop */
while (I != E && !I->isTerminator())
++I;
@@ -195,7 +196,7 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
instr_iterator B = instr_begin(), E = instr_end(), I = E;
- while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+ while (I != B && ((--I)->isTerminator() || I->isDebugInstr()))
; /*noop */
while (I != E && !I->isTerminator())
++I;
@@ -213,7 +214,7 @@ MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
while (I != B) {
--I;
// Return instruction that starts a bundle.
- if (I->isDebugValue() || I->isInsideBundle())
+ if (I->isDebugInstr() || I->isInsideBundle())
continue;
return I;
}
@@ -259,8 +260,8 @@ std::string MachineBasicBlock::getFullName() const {
return Name;
}
-void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes)
- const {
+void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes,
+ bool IsStandalone) const {
const MachineFunction *MF = getParent();
if (!MF) {
OS << "Can't print out MachineBasicBlock because parent MachineFunction"
@@ -270,11 +271,13 @@ void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes)
const Function &F = MF->getFunction();
const Module *M = F.getParent();
ModuleSlotTracker MST(M);
- print(OS, MST, Indexes);
+ MST.incorporateFunction(F);
+ print(OS, MST, Indexes, IsStandalone);
}
void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
- const SlotIndexes *Indexes) const {
+ const SlotIndexes *Indexes,
+ bool IsStandalone) const {
const MachineFunction *MF = getParent();
if (!MF) {
OS << "Can't print out MachineBasicBlock because parent MachineFunction"
@@ -285,70 +288,143 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (Indexes)
OS << Indexes->getMBBStartIdx(this) << '\t';
- OS << printMBBReference(*this) << ": ";
-
- const char *Comma = "";
- if (const BasicBlock *LBB = getBasicBlock()) {
- OS << Comma << "derived from LLVM BB ";
- LBB->printAsOperand(OS, /*PrintType=*/false, MST);
- Comma = ", ";
+ OS << "bb." << getNumber();
+ bool HasAttributes = false;
+ if (const auto *BB = getBasicBlock()) {
+ if (BB->hasName()) {
+ OS << "." << BB->getName();
+ } else {
+ HasAttributes = true;
+ OS << " (";
+ int Slot = MST.getLocalSlot(BB);
+ if (Slot == -1)
+ OS << "<ir-block badref>";
+ else
+ OS << (Twine("%ir-block.") + Twine(Slot)).str();
+ }
}
- if (isEHPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
- if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
- if (Alignment)
- OS << Comma << "Align " << Alignment << " (" << (1u << Alignment)
- << " bytes)";
- OS << '\n';
+ if (hasAddressTaken()) {
+ OS << (HasAttributes ? ", " : " (");
+ OS << "address-taken";
+ HasAttributes = true;
+ }
+ if (isEHPad()) {
+ OS << (HasAttributes ? ", " : " (");
+ OS << "landing-pad";
+ HasAttributes = true;
+ }
+ if (getAlignment()) {
+ OS << (HasAttributes ? ", " : " (");
+ OS << "align " << getAlignment();
+ HasAttributes = true;
+ }
+ if (HasAttributes)
+ OS << ")";
+ OS << ":\n";
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- if (!livein_empty()) {
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetInstrInfo &TII = *getParent()->getSubtarget().getInstrInfo();
+ bool HasLineAttributes = false;
+
+ // Print the preds of this block according to the CFG.
+ if (!pred_empty() && IsStandalone) {
if (Indexes) OS << '\t';
- OS << " Live Ins:";
- for (const auto &LI : LiveIns) {
- OS << ' ' << printReg(LI.PhysReg, TRI);
- if (!LI.LaneMask.all())
- OS << ':' << PrintLaneMask(LI.LaneMask);
+ // Don't indent(2), align with previous line attributes.
+ OS << "; predecessors: ";
+ for (auto I = pred_begin(), E = pred_end(); I != E; ++I) {
+ if (I != pred_begin())
+ OS << ", ";
+ OS << printMBBReference(**I);
}
OS << '\n';
+ HasLineAttributes = true;
}
- // Print the preds of this block according to the CFG.
- if (!pred_empty()) {
+
+ if (!succ_empty()) {
if (Indexes) OS << '\t';
- OS << " Predecessors according to CFG:";
- for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
- OS << " " << printMBBReference(*(*PI));
+ // Print the successors
+ OS.indent(2) << "successors: ";
+ for (auto I = succ_begin(), E = succ_end(); I != E; ++I) {
+ if (I != succ_begin())
+ OS << ", ";
+ OS << printMBBReference(**I);
+ if (!Probs.empty())
+ OS << '('
+ << format("0x%08" PRIx32, getSuccProbability(I).getNumerator())
+ << ')';
+ }
+ if (!Probs.empty() && IsStandalone) {
+ // Print human readable probabilities as comments.
+ OS << "; ";
+ for (auto I = succ_begin(), E = succ_end(); I != E; ++I) {
+ const BranchProbability &BP = *getProbabilityIterator(I);
+ if (I != succ_begin())
+ OS << ", ";
+ OS << printMBBReference(**I) << '('
+ << format("%.2f%%",
+ rint(((double)BP.getNumerator() / BP.getDenominator()) *
+ 100.0 * 100.0) /
+ 100.0)
+ << ')';
+ }
+ }
+
OS << '\n';
+ HasLineAttributes = true;
}
- for (auto &I : instrs()) {
+ if (!livein_empty() && MRI.tracksLiveness()) {
+ if (Indexes) OS << '\t';
+ OS.indent(2) << "liveins: ";
+
+ bool First = true;
+ for (const auto &LI : liveins()) {
+ if (!First)
+ OS << ", ";
+ First = false;
+ OS << printReg(LI.PhysReg, TRI);
+ if (!LI.LaneMask.all())
+ OS << ":0x" << PrintLaneMask(LI.LaneMask);
+ }
+ HasLineAttributes = true;
+ }
+
+ if (HasLineAttributes)
+ OS << '\n';
+
+ bool IsInBundle = false;
+ for (const MachineInstr &MI : instrs()) {
if (Indexes) {
- if (Indexes->hasIndex(I))
- OS << Indexes->getInstructionIndex(I);
+ if (Indexes->hasIndex(MI))
+ OS << Indexes->getInstructionIndex(MI);
OS << '\t';
}
- OS << '\t';
- if (I.isInsideBundle())
- OS << " * ";
- I.print(OS, MST);
- }
- // Print the successors of this block according to the CFG.
- if (!succ_empty()) {
- if (Indexes) OS << '\t';
- OS << " Successors according to CFG:";
- for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) {
- OS << " " << printMBBReference(*(*SI));
- if (!Probs.empty())
- OS << '(' << *getProbabilityIterator(SI) << ')';
+ if (IsInBundle && !MI.isInsideBundle()) {
+ OS.indent(2) << "}\n";
+ IsInBundle = false;
+ }
+
+ OS.indent(IsInBundle ? 4 : 2);
+ MI.print(OS, MST, IsStandalone, /*SkipOpers=*/false, /*SkipDebugLoc=*/false,
+ /*AddNewLine=*/false, &TII);
+
+ if (!IsInBundle && MI.getFlag(MachineInstr::BundledSucc)) {
+ OS << " {";
+ IsInBundle = true;
}
OS << '\n';
}
- if (IrrLoopHeaderWeight) {
+
+ if (IsInBundle)
+ OS.indent(2) << "}\n";
+
+ if (IrrLoopHeaderWeight && IsStandalone) {
if (Indexes) OS << '\t';
- OS << " Irreducible loop header weight: "
- << IrrLoopHeaderWeight.getValue();
- OS << '\n';
+ OS.indent(2) << "; Irreducible loop header weight: "
+ << IrrLoopHeaderWeight.getValue() << '\n';
}
}
@@ -382,10 +458,10 @@ bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const {
}
void MachineBasicBlock::sortUniqueLiveIns() {
- std::sort(LiveIns.begin(), LiveIns.end(),
- [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) {
- return LI0.PhysReg < LI1.PhysReg;
- });
+ llvm::sort(LiveIns.begin(), LiveIns.end(),
+ [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) {
+ return LI0.PhysReg < LI1.PhysReg;
+ });
// Liveins are sorted by physreg now we can merge their lanemasks.
LiveInVector::const_iterator I = LiveIns.begin();
LiveInVector::const_iterator J;
@@ -583,6 +659,25 @@ void MachineBasicBlock::addSuccessorWithoutProb(MachineBasicBlock *Succ) {
Succ->addPredecessor(this);
}
+void MachineBasicBlock::splitSuccessor(MachineBasicBlock *Old,
+ MachineBasicBlock *New,
+ bool NormalizeSuccProbs) {
+ succ_iterator OldI = llvm::find(successors(), Old);
+ assert(OldI != succ_end() && "Old is not a successor of this block!");
+ assert(llvm::find(successors(), New) == succ_end() &&
+ "New is already a successor of this block!");
+
+ // Add a new successor with equal probability as the original one. Note
+ // that we directly copy the probability using the iterator rather than
+ // getting a potentially synthetic probability computed when unknown. This
+ // preserves the probabilities as-is and then we can renormalize them and
+ // query them effectively afterward.
+ addSuccessor(New, Probs.empty() ? BranchProbability::getUnknown()
+ : *getProbabilityIterator(OldI));
+ if (NormalizeSuccProbs)
+ normalizeSuccProbs();
+}
+
void MachineBasicBlock::removeSuccessor(MachineBasicBlock *Succ,
bool NormalizeSuccProbs) {
succ_iterator I = find(Successors, Succ);
@@ -779,9 +874,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
MF->insert(std::next(MachineFunction::iterator(this)), NMBB);
- DEBUG(dbgs() << "Splitting critical edge: " << printMBBReference(*this)
- << " -- " << printMBBReference(*NMBB) << " -- "
- << printMBBReference(*Succ) << '\n');
+ LLVM_DEBUG(dbgs() << "Splitting critical edge: " << printMBBReference(*this)
+ << " -- " << printMBBReference(*NMBB) << " -- "
+ << printMBBReference(*Succ) << '\n');
LiveIntervals *LIS = P.getAnalysisIfAvailable<LiveIntervals>();
SlotIndexes *Indexes = P.getAnalysisIfAvailable<SlotIndexes>();
@@ -810,7 +905,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
LV->getVarInfo(Reg).removeKill(*MI)) {
KilledRegs.push_back(Reg);
- DEBUG(dbgs() << "Removing terminator kill: " << *MI);
+ LLVM_DEBUG(dbgs() << "Removing terminator kill: " << *MI);
OI->setIsKill(false);
}
}
@@ -901,7 +996,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg))
LV->getVarInfo(Reg).Kills.push_back(&*I);
- DEBUG(dbgs() << "Restored terminator kill: " << *I);
+ LLVM_DEBUG(dbgs() << "Restored terminator kill: " << *I);
break;
}
}
@@ -1034,8 +1129,8 @@ bool MachineBasicBlock::canSplitCriticalEdge(
// case that we can't handle. Since this never happens in properly optimized
// code, just skip those edges.
if (TBB && TBB == FBB) {
- DEBUG(dbgs() << "Won't split critical edge after degenerate "
- << printMBBReference(*this) << '\n');
+ LLVM_DEBUG(dbgs() << "Won't split critical edge after degenerate "
+ << printMBBReference(*this) << '\n');
return false;
}
return true;
@@ -1189,6 +1284,16 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
return {};
}
+/// Find the previous valid DebugLoc preceding MBBI, skipping and DBG_VALUE
+/// instructions. Return UnknownLoc if there is none.
+DebugLoc MachineBasicBlock::findPrevDebugLoc(instr_iterator MBBI) {
+ if (MBBI == instr_begin()) return {};
+ // Skip debug declarations, we don't want a DebugLoc from them.
+ MBBI = skipDebugInstructionsBackward(std::prev(MBBI), instr_begin());
+ if (!MBBI->isDebugInstr()) return MBBI->getDebugLoc();
+ return {};
+}
+
/// Find and return the merged DebugLoc of the branch instructions of the block.
/// Return UnknownLoc if there is none.
DebugLoc
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 167135b56ec0..21350df624e7 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -198,10 +198,10 @@ namespace {
class BlockChain;
-/// \brief Type for our function-wide basic block -> block chain mapping.
+/// Type for our function-wide basic block -> block chain mapping.
using BlockToChainMapType = DenseMap<const MachineBasicBlock *, BlockChain *>;
-/// \brief A chain of blocks which will be laid out contiguously.
+/// A chain of blocks which will be laid out contiguously.
///
/// This is the datastructure representing a chain of consecutive blocks that
/// are profitable to layout together in order to maximize fallthrough
@@ -213,13 +213,13 @@ using BlockToChainMapType = DenseMap<const MachineBasicBlock *, BlockChain *>;
/// them. They participate in a block-to-chain mapping, which is updated
/// automatically as chains are merged together.
class BlockChain {
- /// \brief The sequence of blocks belonging to this chain.
+ /// The sequence of blocks belonging to this chain.
///
/// This is the sequence of blocks for a particular chain. These will be laid
/// out in-order within the function.
SmallVector<MachineBasicBlock *, 4> Blocks;
- /// \brief A handle to the function-wide basic block to block chain mapping.
+ /// A handle to the function-wide basic block to block chain mapping.
///
/// This is retained in each block chain to simplify the computation of child
/// block chains for SCC-formation and iteration. We store the edges to child
@@ -228,7 +228,7 @@ class BlockChain {
BlockToChainMapType &BlockToChain;
public:
- /// \brief Construct a new BlockChain.
+ /// Construct a new BlockChain.
///
/// This builds a new block chain representing a single basic block in the
/// function. It also registers itself as the chain that block participates
@@ -239,15 +239,15 @@ public:
BlockToChain[BB] = this;
}
- /// \brief Iterator over blocks within the chain.
+ /// Iterator over blocks within the chain.
using iterator = SmallVectorImpl<MachineBasicBlock *>::iterator;
using const_iterator = SmallVectorImpl<MachineBasicBlock *>::const_iterator;
- /// \brief Beginning of blocks within the chain.
+ /// Beginning of blocks within the chain.
iterator begin() { return Blocks.begin(); }
const_iterator begin() const { return Blocks.begin(); }
- /// \brief End of blocks within the chain.
+ /// End of blocks within the chain.
iterator end() { return Blocks.end(); }
const_iterator end() const { return Blocks.end(); }
@@ -261,7 +261,7 @@ public:
return false;
}
- /// \brief Merge a block chain into this one.
+ /// Merge a block chain into this one.
///
/// This routine merges a block chain into this one. It takes care of forming
/// a contiguous sequence of basic blocks, updating the edge list, and
@@ -293,14 +293,14 @@ public:
}
#ifndef NDEBUG
- /// \brief Dump the blocks in this chain.
+ /// Dump the blocks in this chain.
LLVM_DUMP_METHOD void dump() {
for (MachineBasicBlock *MBB : *this)
MBB->dump();
}
#endif // NDEBUG
- /// \brief Count of predecessors of any block within the chain which have not
+ /// Count of predecessors of any block within the chain which have not
/// yet been scheduled. In general, we will delay scheduling this chain
/// until those predecessors are scheduled (or we find a sufficiently good
/// reason to override this heuristic.) Note that when forming loop chains,
@@ -313,7 +313,7 @@ public:
};
class MachineBlockPlacement : public MachineFunctionPass {
- /// \brief A type for a block filter set.
+ /// A type for a block filter set.
using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
/// Pair struct containing basic block and taildup profitiability
@@ -329,47 +329,47 @@ class MachineBlockPlacement : public MachineFunctionPass {
MachineBasicBlock *Dest;
};
- /// \brief work lists of blocks that are ready to be laid out
+ /// work lists of blocks that are ready to be laid out
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
/// Edges that have already been computed as optimal.
DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges;
- /// \brief Machine Function
+ /// Machine Function
MachineFunction *F;
- /// \brief A handle to the branch probability pass.
+ /// A handle to the branch probability pass.
const MachineBranchProbabilityInfo *MBPI;
- /// \brief A handle to the function-wide block frequency pass.
+ /// A handle to the function-wide block frequency pass.
std::unique_ptr<BranchFolder::MBFIWrapper> MBFI;
- /// \brief A handle to the loop info.
+ /// A handle to the loop info.
MachineLoopInfo *MLI;
- /// \brief Preferred loop exit.
+ /// Preferred loop exit.
/// Member variable for convenience. It may be removed by duplication deep
/// in the call stack.
MachineBasicBlock *PreferredLoopExit;
- /// \brief A handle to the target's instruction info.
+ /// A handle to the target's instruction info.
const TargetInstrInfo *TII;
- /// \brief A handle to the target's lowering info.
+ /// A handle to the target's lowering info.
const TargetLoweringBase *TLI;
- /// \brief A handle to the post dominator tree.
+ /// A handle to the post dominator tree.
MachinePostDominatorTree *MPDT;
- /// \brief Duplicator used to duplicate tails during placement.
+ /// Duplicator used to duplicate tails during placement.
///
/// Placement decisions can open up new tail duplication opportunities, but
/// since tail duplication affects placement decisions of later blocks, it
/// must be done inline.
TailDuplicator TailDup;
- /// \brief Allocator and owner of BlockChain structures.
+ /// Allocator and owner of BlockChain structures.
///
/// We build BlockChains lazily while processing the loop structure of
/// a function. To reduce malloc traffic, we allocate them using this
@@ -378,7 +378,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// the chains.
SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
- /// \brief Function wide BasicBlock to BlockChain mapping.
+ /// Function wide BasicBlock to BlockChain mapping.
///
/// This mapping allows efficiently moving from any given basic block to the
/// BlockChain it participates in, if any. We use it to, among other things,
@@ -425,7 +425,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
MachineBasicBlock *BB, MachineBasicBlock *LPred,
BlockChain &Chain, BlockFilterSet *BlockFilter,
MachineFunction::iterator &PrevUnplacedBlockIt,
- bool &DuplicatedToPred);
+ bool &DuplicatedToLPred);
bool hasBetterLayoutPredecessor(
const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
const BlockChain &SuccChain, BranchProbability SuccProb,
@@ -441,7 +441,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
MachineFunction::iterator &PrevUnplacedBlockIt,
const BlockFilterSet *BlockFilter);
- /// \brief Add a basic block to the work list if it is appropriate.
+ /// Add a basic block to the work list if it is appropriate.
///
/// If the optional parameter BlockFilter is provided, only MBB
/// present in the set will be added to the worklist. If nullptr
@@ -474,7 +474,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// fallthroughs.
bool isProfitableToTailDup(
const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
- BranchProbability AdjustedSumProb,
+ BranchProbability QProb,
const BlockChain &Chain, const BlockFilterSet *BlockFilter);
/// Check for a trellis layout.
@@ -545,7 +545,7 @@ INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
#ifndef NDEBUG
-/// \brief Helper to print the name of a MBB.
+/// Helper to print the name of a MBB.
///
/// Only used by debug logging.
static std::string getBlockName(const MachineBasicBlock *BB) {
@@ -558,7 +558,7 @@ static std::string getBlockName(const MachineBasicBlock *BB) {
}
#endif
-/// \brief Mark a chain's successors as having one fewer preds.
+/// Mark a chain's successors as having one fewer preds.
///
/// When a chain is being merged into the "placed" chain, this routine will
/// quickly walk the successors of each block in the chain and mark them as
@@ -574,7 +574,7 @@ void MachineBlockPlacement::markChainSuccessors(
}
}
-/// \brief Mark a single block's successors as having one fewer preds.
+/// Mark a single block's successors as having one fewer preds.
///
/// Under normal circumstances, this is only called by markChainSuccessors,
/// but if a block that was to be placed is completely tail-duplicated away,
@@ -643,7 +643,8 @@ BranchProbability MachineBlockPlacement::collectViableSuccessors(
if (SuccChain == &Chain) {
SkipSucc = true;
} else if (Succ != *SuccChain->begin()) {
- DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n");
+ LLVM_DEBUG(dbgs() << " " << getBlockName(Succ)
+ << " -> Mid chain!\n");
continue;
}
}
@@ -1010,7 +1011,7 @@ MachineBlockPlacement::getBestTrellisSuccessor(
// If we have a trellis, and BB doesn't have the best fallthrough edges,
// we shouldn't choose any successor. We've already looked and there's a
// better fallthrough edge for all the successors.
- DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n");
+ LLVM_DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n");
return Result;
}
@@ -1027,10 +1028,11 @@ MachineBlockPlacement::getBestTrellisSuccessor(
canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
Chain, BlockFilter)) {
- DEBUG(BranchProbability Succ2Prob = getAdjustedProbability(
- MBPI->getEdgeProbability(BB, Succ2), AdjustedSumProb);
- dbgs() << " Selected: " << getBlockName(Succ2)
- << ", probability: " << Succ2Prob << " (Tail Duplicate)\n");
+ LLVM_DEBUG(BranchProbability Succ2Prob = getAdjustedProbability(
+ MBPI->getEdgeProbability(BB, Succ2), AdjustedSumProb);
+ dbgs() << " Selected: " << getBlockName(Succ2)
+ << ", probability: " << Succ2Prob
+ << " (Tail Duplicate)\n");
Result.BB = Succ2;
Result.ShouldTailDup = true;
return Result;
@@ -1041,10 +1043,10 @@ MachineBlockPlacement::getBestTrellisSuccessor(
ComputedEdges[BestB.Src] = { BestB.Dest, false };
auto TrellisSucc = BestA.Dest;
- DEBUG(BranchProbability SuccProb = getAdjustedProbability(
- MBPI->getEdgeProbability(BB, TrellisSucc), AdjustedSumProb);
- dbgs() << " Selected: " << getBlockName(TrellisSucc)
- << ", probability: " << SuccProb << " (Trellis)\n");
+ LLVM_DEBUG(BranchProbability SuccProb = getAdjustedProbability(
+ MBPI->getEdgeProbability(BB, TrellisSucc), AdjustedSumProb);
+ dbgs() << " Selected: " << getBlockName(TrellisSucc)
+ << ", probability: " << SuccProb << " (Trellis)\n");
Result.BB = TrellisSucc;
return Result;
}
@@ -1150,7 +1152,7 @@ void MachineBlockPlacement::precomputeTriangleChains() {
if (TriangleChainCount == 0)
return;
- DEBUG(dbgs() << "Pre-computing triangle chains.\n");
+ LLVM_DEBUG(dbgs() << "Pre-computing triangle chains.\n");
// Map from last block to the chain that contains it. This allows us to extend
// chains as we find new triangles.
DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap;
@@ -1224,8 +1226,9 @@ void MachineBlockPlacement::precomputeTriangleChains() {
MachineBasicBlock *dst = Chain.Edges.back();
Chain.Edges.pop_back();
for (MachineBasicBlock *src : reverse(Chain.Edges)) {
- DEBUG(dbgs() << "Marking edge: " << getBlockName(src) << "->" <<
- getBlockName(dst) << " as pre-computed based on triangles.\n");
+ LLVM_DEBUG(dbgs() << "Marking edge: " << getBlockName(src) << "->"
+ << getBlockName(dst)
+ << " as pre-computed based on triangles.\n");
auto InsertResult = ComputedEdges.insert({src, {dst, true}});
assert(InsertResult.second && "Block seen twice.");
@@ -1431,15 +1434,15 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
}
if (BadCFGConflict) {
- DEBUG(dbgs() << " Not a candidate: " << getBlockName(Succ) << " -> " << SuccProb
- << " (prob) (non-cold CFG conflict)\n");
+ LLVM_DEBUG(dbgs() << " Not a candidate: " << getBlockName(Succ) << " -> "
+ << SuccProb << " (prob) (non-cold CFG conflict)\n");
return true;
}
return false;
}
-/// \brief Select the best successor for a block.
+/// Select the best successor for a block.
///
/// This looks across all successors of a particular block and attempts to
/// select the "best" one to be the layout successor. It only considers direct
@@ -1462,7 +1465,8 @@ MachineBlockPlacement::selectBestSuccessor(
auto AdjustedSumProb =
collectViableSuccessors(BB, Chain, BlockFilter, Successors);
- DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n");
+ LLVM_DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB)
+ << "\n");
// if we already precomputed the best successor for BB, return that if still
// applicable.
@@ -1503,18 +1507,18 @@ MachineBlockPlacement::selectBestSuccessor(
continue;
}
- DEBUG(
- dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: "
- << SuccProb
+ LLVM_DEBUG(
+ dbgs() << " Candidate: " << getBlockName(Succ)
+ << ", probability: " << SuccProb
<< (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
<< "\n");
if (BestSucc.BB && BestProb >= SuccProb) {
- DEBUG(dbgs() << " Not the best candidate, continuing\n");
+ LLVM_DEBUG(dbgs() << " Not the best candidate, continuing\n");
continue;
}
- DEBUG(dbgs() << " Setting it as best candidate\n");
+ LLVM_DEBUG(dbgs() << " Setting it as best candidate\n");
BestSucc.BB = Succ;
BestProb = SuccProb;
}
@@ -1539,10 +1543,9 @@ MachineBlockPlacement::selectBestSuccessor(
break;
if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter)
&& (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
- DEBUG(
- dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: "
- << DupProb
- << " (Tail Duplicate)\n");
+ LLVM_DEBUG(dbgs() << " Candidate: " << getBlockName(Succ)
+ << ", probability: " << DupProb
+ << " (Tail Duplicate)\n");
BestSucc.BB = Succ;
BestSucc.ShouldTailDup = true;
break;
@@ -1550,12 +1553,12 @@ MachineBlockPlacement::selectBestSuccessor(
}
if (BestSucc.BB)
- DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc.BB) << "\n");
+ LLVM_DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc.BB) << "\n");
return BestSucc;
}
-/// \brief Select the best block from a worklist.
+/// Select the best block from a worklist.
///
/// This looks through the provided worklist as a list of candidate basic
/// blocks and select the most profitable one to place. The definition of
@@ -1596,8 +1599,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
"Found CFG-violating block");
BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
- DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
- MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
+ LLVM_DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
+ MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
// For ehpad, we layout the least probable first as to avoid jumping back
// from least probable landingpads to more probable ones.
@@ -1627,7 +1630,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
return BestBlock;
}
-/// \brief Retrieve the first unplaced basic block.
+/// Retrieve the first unplaced basic block.
///
/// This routine is called when we are unable to use the CFG to walk through
/// all of the basic blocks and form a chain due to unnatural loops in the CFG.
@@ -1723,8 +1726,8 @@ void MachineBlockPlacement::buildChain(
if (!BestSucc)
break;
- DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
- "layout successor until the CFG reduces\n");
+ LLVM_DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
+ "layout successor until the CFG reduces\n");
}
// Placement may have changed tail duplication opportunities.
@@ -1743,18 +1746,18 @@ void MachineBlockPlacement::buildChain(
// Zero out UnscheduledPredecessors for the successor we're about to merge in case
// we selected a successor that didn't fit naturally into the CFG.
SuccChain.UnscheduledPredecessors = 0;
- DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to "
- << getBlockName(BestSucc) << "\n");
+ LLVM_DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to "
+ << getBlockName(BestSucc) << "\n");
markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
Chain.merge(BestSucc, &SuccChain);
BB = *std::prev(Chain.end());
}
- DEBUG(dbgs() << "Finished forming chain for header block "
- << getBlockName(*Chain.begin()) << "\n");
+ LLVM_DEBUG(dbgs() << "Finished forming chain for header block "
+ << getBlockName(*Chain.begin()) << "\n");
}
-/// \brief Find the best loop top block for layout.
+/// Find the best loop top block for layout.
///
/// Look for a block which is strictly better than the loop header for laying
/// out at the top of the loop. This looks for one and only one pattern:
@@ -1784,17 +1787,17 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
if (!LoopBlockSet.count(*HeaderChain.begin()))
return L.getHeader();
- DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader())
- << "\n");
+ LLVM_DEBUG(dbgs() << "Finding best loop top for: "
+ << getBlockName(L.getHeader()) << "\n");
BlockFrequency BestPredFreq;
MachineBasicBlock *BestPred = nullptr;
for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
if (!LoopBlockSet.count(Pred))
continue;
- DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", has "
- << Pred->succ_size() << " successors, ";
- MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
+ LLVM_DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", has "
+ << Pred->succ_size() << " successors, ";
+ MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
if (Pred->succ_size() > 1)
continue;
@@ -1809,7 +1812,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
// If no direct predecessor is fine, just use the loop header.
if (!BestPred) {
- DEBUG(dbgs() << " final top unchanged\n");
+ LLVM_DEBUG(dbgs() << " final top unchanged\n");
return L.getHeader();
}
@@ -1819,11 +1822,11 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
*BestPred->pred_begin() != L.getHeader())
BestPred = *BestPred->pred_begin();
- DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n");
+ LLVM_DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n");
return BestPred;
}
-/// \brief Find the best loop exiting block for layout.
+/// Find the best loop exiting block for layout.
///
/// This routine implements the logic to analyze the loop looking for the best
/// block to layout at the top of the loop. Typically this is done to maximize
@@ -1851,8 +1854,8 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
// blocks where rotating to exit with that block will reach an outer loop.
SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
- DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader())
- << "\n");
+ LLVM_DEBUG(dbgs() << "Finding best loop exit for: "
+ << getBlockName(L.getHeader()) << "\n");
for (MachineBasicBlock *MBB : L.getBlocks()) {
BlockChain &Chain = *BlockToChain[MBB];
// Ensure that this block is at the end of a chain; otherwise it could be
@@ -1875,15 +1878,15 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
BlockChain &SuccChain = *BlockToChain[Succ];
// Don't split chains, either this chain or the successor's chain.
if (&Chain == &SuccChain) {
- DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
- << getBlockName(Succ) << " (chain conflict)\n");
+ LLVM_DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " (chain conflict)\n");
continue;
}
auto SuccProb = MBPI->getEdgeProbability(MBB, Succ);
if (LoopBlockSet.count(Succ)) {
- DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> "
- << getBlockName(Succ) << " (" << SuccProb << ")\n");
+ LLVM_DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " (" << SuccProb << ")\n");
HasLoopingSucc = true;
continue;
}
@@ -1896,9 +1899,10 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
}
BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
- DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
- << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
- MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
+ LLVM_DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " [L:" << SuccLoopDepth
+ << "] (";
+ MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
// Note that we bias this toward an existing layout successor to retain
// incoming order in the absence of better information. The exit must have
// a frequency higher than the current exit before we consider breaking
@@ -1922,11 +1926,12 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
// Without a candidate exiting block or with only a single block in the
// loop, just use the loop header to layout the loop.
if (!ExitingBB) {
- DEBUG(dbgs() << " No other candidate exit blocks, using loop header\n");
+ LLVM_DEBUG(
+ dbgs() << " No other candidate exit blocks, using loop header\n");
return nullptr;
}
if (L.getNumBlocks() == 1) {
- DEBUG(dbgs() << " Loop has 1 block, using loop header as exit\n");
+ LLVM_DEBUG(dbgs() << " Loop has 1 block, using loop header as exit\n");
return nullptr;
}
@@ -1937,11 +1942,12 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
!BlocksExitingToOuterLoop.count(ExitingBB))
return nullptr;
- DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n");
+ LLVM_DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB)
+ << "\n");
return ExitingBB;
}
-/// \brief Attempt to rotate an exiting block to the bottom of the loop.
+/// Attempt to rotate an exiting block to the bottom of the loop.
///
/// Once we have built a chain, try to rotate it to line up the hot exit block
/// with fallthrough out of the loop if doing so doesn't introduce unnecessary
@@ -2014,12 +2020,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
return;
}
- DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB)
- << " at bottom\n");
+ LLVM_DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB)
+ << " at bottom\n");
std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
}
-/// \brief Attempt to rotate a loop based on profile data to reduce branch cost.
+/// Attempt to rotate a loop based on profile data to reduce branch cost.
///
/// With profile data, we can determine the cost in terms of missed fall through
/// opportunities when rotating a loop chain and select the best rotation.
@@ -2150,8 +2156,9 @@ void MachineBlockPlacement::rotateLoopWithProfile(
}
}
- DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockName(*Iter)
- << " to the top: " << Cost.getFrequency() << "\n");
+ LLVM_DEBUG(dbgs() << "The cost of loop rotation by making "
+ << getBlockName(*Iter)
+ << " to the top: " << Cost.getFrequency() << "\n");
if (Cost < SmallestRotationCost) {
SmallestRotationCost = Cost;
@@ -2160,13 +2167,13 @@ void MachineBlockPlacement::rotateLoopWithProfile(
}
if (RotationPos != LoopChain.end()) {
- DEBUG(dbgs() << "Rotate loop by making " << getBlockName(*RotationPos)
- << " to the top\n");
+ LLVM_DEBUG(dbgs() << "Rotate loop by making " << getBlockName(*RotationPos)
+ << " to the top\n");
std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
}
}
-/// \brief Collect blocks in the given loop that are to be placed.
+/// Collect blocks in the given loop that are to be placed.
///
/// When profile data is available, exclude cold blocks from the returned set;
/// otherwise, collect all blocks in the loop.
@@ -2202,7 +2209,7 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
return LoopBlockSet;
}
-/// \brief Forms basic block chains from the natural loop structures.
+/// Forms basic block chains from the natural loop structures.
///
/// These chains are designed to preserve the existing *structure* of the code
/// as much as possible. We can then stitch the chains together in a way which
@@ -2265,7 +2272,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
else
rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
- DEBUG({
+ LLVM_DEBUG({
// Crash at the end so we get all of the debugging output first.
bool BadLoop = false;
if (LoopChain.UnscheduledPredecessors) {
@@ -2324,9 +2331,9 @@ void MachineBlockPlacement::buildCFGChains() {
// Ensure that the layout successor is a viable block, as we know that
// fallthrough is a possibility.
assert(NextFI != FE && "Can't fallthrough past the last block.");
- DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
- << getBlockName(BB) << " -> " << getBlockName(NextBB)
- << "\n");
+ LLVM_DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
+ << getBlockName(BB) << " -> " << getBlockName(NextBB)
+ << "\n");
Chain->merge(NextBB, nullptr);
#ifndef NDEBUG
BlocksWithUnanalyzableExits.insert(&*BB);
@@ -2356,7 +2363,7 @@ void MachineBlockPlacement::buildCFGChains() {
#ifndef NDEBUG
using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>;
#endif
- DEBUG({
+ LLVM_DEBUG({
// Crash at the end so we get all of the debugging output first.
bool BadFunc = false;
FunctionBlockSetType FunctionBlockSet;
@@ -2381,11 +2388,11 @@ void MachineBlockPlacement::buildCFGChains() {
// Splice the blocks into place.
MachineFunction::iterator InsertPos = F->begin();
- DEBUG(dbgs() << "[MBP] Function: "<< F->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "[MBP] Function: " << F->getName() << "\n");
for (MachineBasicBlock *ChainBB : FunctionChain) {
- DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
- : " ... ")
- << getBlockName(ChainBB) << "\n");
+ LLVM_DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
+ : " ... ")
+ << getBlockName(ChainBB) << "\n");
if (InsertPos != MachineFunction::iterator(ChainBB))
F->splice(InsertPos, ChainBB);
else
@@ -2470,11 +2477,11 @@ void MachineBlockPlacement::optimizeBranches() {
MBPI->getEdgeProbability(ChainBB, FBB) >
MBPI->getEdgeProbability(ChainBB, TBB) &&
!TII->reverseBranchCondition(Cond)) {
- DEBUG(dbgs() << "Reverse order of the two branches: "
- << getBlockName(ChainBB) << "\n");
- DEBUG(dbgs() << " Edge probability: "
- << MBPI->getEdgeProbability(ChainBB, FBB) << " vs "
- << MBPI->getEdgeProbability(ChainBB, TBB) << "\n");
+ LLVM_DEBUG(dbgs() << "Reverse order of the two branches: "
+ << getBlockName(ChainBB) << "\n");
+ LLVM_DEBUG(dbgs() << " Edge probability: "
+ << MBPI->getEdgeProbability(ChainBB, FBB) << " vs "
+ << MBPI->getEdgeProbability(ChainBB, TBB) << "\n");
DebugLoc dl; // FIXME: this is nowhere
TII->removeBranch(*ChainBB);
TII->insertBranch(*ChainBB, FBB, TBB, Cond, dl);
@@ -2638,8 +2645,8 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
if (!shouldTailDuplicate(BB))
return false;
- DEBUG(dbgs() << "Redoing tail duplication for Succ#"
- << BB->getNumber() << "\n");
+ LLVM_DEBUG(dbgs() << "Redoing tail duplication for Succ#" << BB->getNumber()
+ << "\n");
// This has to be a callback because none of it can be done after
// BB is deleted.
@@ -2687,8 +2694,8 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
if (RemBB == PreferredLoopExit)
PreferredLoopExit = nullptr;
- DEBUG(dbgs() << "TailDuplicator deleted block: "
- << getBlockName(RemBB) << "\n");
+ LLVM_DEBUG(dbgs() << "TailDuplicator deleted block: "
+ << getBlockName(RemBB) << "\n");
};
auto RemovalCallbackRef =
function_ref<void(MachineBasicBlock*)>(RemovalCallback);
@@ -2752,7 +2759,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
TailDupSize = TailDupPlacementAggressiveThreshold;
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
- // For agressive optimization, we can adjust some thresholds to be less
+ // For aggressive optimization, we can adjust some thresholds to be less
// conservative.
if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) {
// At O3 we should be more willing to copy blocks for tail duplication. This
@@ -2834,17 +2841,17 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
namespace {
-/// \brief A pass to compute block placement statistics.
+/// A pass to compute block placement statistics.
///
/// A separate pass to compute interesting statistics for evaluating block
/// placement. This is separate from the actual placement pass so that they can
/// be computed in the absence of any placement transformations or when using
/// alternative placement strategies.
class MachineBlockPlacementStats : public MachineFunctionPass {
- /// \brief A handle to the branch probability pass.
+ /// A handle to the branch probability pass.
const MachineBranchProbabilityInfo *MBPI;
- /// \brief A handle to the function-wide block frequency pass.
+ /// A handle to the function-wide block frequency pass.
const MachineBlockFrequencyInfo *MBFI;
public:
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index 53c0d840ac84..6c92b1d426d6 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -176,11 +176,10 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
// class given a super-reg class and subreg index.
if (DefMI->getOperand(1).getSubReg())
continue;
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- if (!MRI->constrainRegClass(SrcReg, RC))
+ if (!MRI->constrainRegAttrs(SrcReg, Reg))
continue;
- DEBUG(dbgs() << "Coalescing: " << *DefMI);
- DEBUG(dbgs() << "*** to: " << *MI);
+ LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ LLVM_DEBUG(dbgs() << "*** to: " << *MI);
// Propagate SrcReg of copies to MI.
MO.setReg(SrcReg);
MRI->clearKillFlags(SrcReg);
@@ -315,7 +314,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
unsigned LookAheadLeft = LookAheadLimit;
while (LookAheadLeft) {
// Skip over dbg_value's.
- while (I != E && I != EE && I->isDebugValue())
+ while (I != E && I != EE && I->isDebugInstr())
++I;
if (I == EE) {
@@ -354,7 +353,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
bool MachineCSE::isCSECandidate(MachineInstr *MI) {
if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
- MI->isInlineAsm() || MI->isDebugValue())
+ MI->isInlineAsm() || MI->isDebugInstr())
return false;
// Ignore copies.
@@ -446,25 +445,23 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
// Heuristics #3: If the common subexpression is used by PHIs, do not reuse
// it unless the defined value is already used in the BB of the new use.
bool HasPHI = false;
- SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
- for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
- HasPHI |= MI.isPHI();
- CSBBs.insert(MI.getParent());
+ for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
+ HasPHI |= UseMI.isPHI();
+ if (UseMI.getParent() == MI->getParent())
+ return true;
}
- if (!HasPHI)
- return true;
- return CSBBs.count(MI->getParent());
+ return !HasPHI;
}
void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
- DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
ScopeType *Scope = new ScopeType(VNT);
ScopeMap[MBB] = Scope;
}
void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
- DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
assert(SI != ScopeMap.end());
delete SI->second;
@@ -548,13 +545,12 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Found a common subexpression, eliminate it.
unsigned CSVN = VNT.lookup(MI);
MachineInstr *CSMI = Exps[CSVN];
- DEBUG(dbgs() << "Examining: " << *MI);
- DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
+ LLVM_DEBUG(dbgs() << "Examining: " << *MI);
+ LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
// Check if it's profitable to perform this CSE.
bool DoCSE = true;
- unsigned NumDefs = MI->getDesc().getNumDefs() +
- MI->getDesc().getNumImplicitDefs();
+ unsigned NumDefs = MI->getNumDefs();
for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
@@ -583,16 +579,17 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
"Do not CSE physical register defs!");
if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
- DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
+ LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
DoCSE = false;
break;
}
- // Don't perform CSE if the result of the old instruction cannot exist
- // within the register class of the new instruction.
- const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
- if (!MRI->constrainRegClass(NewReg, OldRC)) {
- DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n");
+ // Don't perform CSE if the result of the new instruction cannot exist
+ // within the constraints (register class, bank, or low-level type) of
+ // the old instruction.
+ if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
+ LLVM_DEBUG(
+ dbgs() << "*** Not the same register constraints, avoid CSE!\n");
DoCSE = false;
break;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
index 702d21228477..0c6efff7bb40 100644
--- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -39,8 +39,27 @@ inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
cl::desc("Incremental depth computation will be used for basic "
"blocks with more instructions."), cl::init(500));
+static cl::opt<bool> dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden,
+ cl::desc("Dump all substituted intrs"),
+ cl::init(false));
+
+#ifdef EXPENSIVE_CHECKS
+static cl::opt<bool> VerifyPatternOrder(
+ "machine-combiner-verify-pattern-order", cl::Hidden,
+ cl::desc(
+ "Verify that the generated patterns are ordered by increasing latency"),
+ cl::init(true));
+#else
+static cl::opt<bool> VerifyPatternOrder(
+ "machine-combiner-verify-pattern-order", cl::Hidden,
+ cl::desc(
+ "Verify that the generated patterns are ordered by increasing latency"),
+ cl::init(false));
+#endif
+
namespace {
class MachineCombiner : public MachineFunctionPass {
+ const TargetSubtargetInfo *STI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MCSchedModel SchedModel;
@@ -85,6 +104,14 @@ private:
SmallVectorImpl<MachineInstr *> &DelInstrs);
void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
+ std::pair<unsigned, unsigned>
+ getLatenciesForInstrSequences(MachineInstr &MI,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ MachineTraceMetrics::Trace BlockTrace);
+
+ void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root,
+ SmallVector<MachineCombinerPattern, 16> &Patterns);
};
}
@@ -140,9 +167,6 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
// are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
for (auto *InstrPtr : InsInstrs) { // for each Use
unsigned IDepth = 0;
- DEBUG(dbgs() << "NEW INSTR ";
- InstrPtr->print(dbgs(), TII);
- dbgs() << "\n";);
for (const MachineOperand &MO : InstrPtr->operands()) {
// Check for virtual register operand.
if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
@@ -242,6 +266,29 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
}
}
+/// Estimate the latency of the new and original instruction sequence by summing
+/// up the latencies of the inserted and deleted instructions. This assumes
+/// that the inserted and deleted instructions are dependent instruction chains,
+/// which might not hold in all cases.
+std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
+ MachineInstr &MI, SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ MachineTraceMetrics::Trace BlockTrace) {
+ assert(!InsInstrs.empty() && "Only support sequences that insert instrs.");
+ unsigned NewRootLatency = 0;
+ // NewRoot is the last instruction in the \p InsInstrs vector.
+ MachineInstr *NewRoot = InsInstrs.back();
+ for (unsigned i = 0; i < InsInstrs.size() - 1; i++)
+ NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
+ NewRootLatency += getLatency(&MI, NewRoot, BlockTrace);
+
+ unsigned RootLatency = 0;
+ for (auto I : DelInstrs)
+ RootLatency += TSchedModel.computeInstrLatency(I);
+
+ return {NewRootLatency, RootLatency};
+}
+
/// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
/// The new code sequence ends in MI NewRoot. A necessary condition for the new
/// sequence to replace the old sequence is that it cannot lengthen the critical
@@ -257,56 +304,50 @@ bool MachineCombiner::improvesCriticalPathLen(
bool SlackIsAccurate) {
assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
"Missing machine model\n");
- // NewRoot is the last instruction in the \p InsInstrs vector.
- unsigned NewRootIdx = InsInstrs.size() - 1;
- MachineInstr *NewRoot = InsInstrs[NewRootIdx];
-
// Get depth and latency of NewRoot and Root.
unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
- DEBUG(dbgs() << "DEPENDENCE DATA FOR " << *Root << "\n";
- dbgs() << " NewRootDepth: " << NewRootDepth << "\n";
- dbgs() << " RootDepth: " << RootDepth << "\n");
+ LLVM_DEBUG(dbgs() << " Dependence data for " << *Root << "\tNewRootDepth: "
+ << NewRootDepth << "\tRootDepth: " << RootDepth);
// For a transform such as reassociation, the cost equation is
// conservatively calculated so that we must improve the depth (data
// dependency cycles) in the critical path to proceed with the transform.
// Being conservative also protects against inaccuracies in the underlying
// machine trace metrics and CPU models.
- if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth)
+ if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) {
+ LLVM_DEBUG(dbgs() << "\tIt MustReduceDepth ");
+ LLVM_DEBUG(NewRootDepth < RootDepth
+ ? dbgs() << "\t and it does it\n"
+ : dbgs() << "\t but it does NOT do it\n");
return NewRootDepth < RootDepth;
+ }
// A more flexible cost calculation for the critical path includes the slack
// of the original code sequence. This may allow the transform to proceed
// even if the instruction depths (data dependency cycles) become worse.
// Account for the latency of the inserted and deleted instructions by
- // adding up their latencies. This assumes that the inserted and deleted
- // instructions are dependent instruction chains, which might not hold
- // in all cases.
- unsigned NewRootLatency = 0;
- for (unsigned i = 0; i < InsInstrs.size() - 1; i++)
- NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
- NewRootLatency += getLatency(Root, NewRoot, BlockTrace);
-
- unsigned RootLatency = 0;
- for (auto I : DelInstrs)
- RootLatency += TSchedModel.computeInstrLatency(I);
+ unsigned NewRootLatency, RootLatency;
+ std::tie(NewRootLatency, RootLatency) =
+ getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
unsigned NewCycleCount = NewRootDepth + NewRootLatency;
- unsigned OldCycleCount = RootDepth + RootLatency +
- (SlackIsAccurate ? RootSlack : 0);
- DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
- dbgs() << " RootLatency: " << RootLatency << "\n";
- dbgs() << " RootSlack: " << RootSlack << " SlackIsAccurate="
- << SlackIsAccurate << "\n";
- dbgs() << " NewRootDepth + NewRootLatency = "
- << NewCycleCount << "\n";
- dbgs() << " RootDepth + RootLatency + RootSlack = "
- << OldCycleCount << "\n";
- );
+ unsigned OldCycleCount =
+ RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0);
+ LLVM_DEBUG(dbgs() << "\n\tNewRootLatency: " << NewRootLatency
+ << "\tRootLatency: " << RootLatency << "\n\tRootSlack: "
+ << RootSlack << " SlackIsAccurate=" << SlackIsAccurate
+ << "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount
+ << "\n\tRootDepth + RootLatency + RootSlack = "
+ << OldCycleCount;);
+ LLVM_DEBUG(NewCycleCount <= OldCycleCount
+ ? dbgs() << "\n\t It IMPROVES PathLen because"
+ : dbgs() << "\n\t It DOES NOT improve PathLen because");
+ LLVM_DEBUG(dbgs() << "\n\t\tNewCycleCount = " << NewCycleCount
+ << ", OldCycleCount = " << OldCycleCount << "\n");
return NewCycleCount <= OldCycleCount;
}
@@ -352,9 +393,14 @@ bool MachineCombiner::preservesResourceLen(
unsigned ResLenAfterCombine =
BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
- DEBUG(dbgs() << "RESOURCE DATA: \n";
- dbgs() << " resource len before: " << ResLenBeforeCombine
- << " after: " << ResLenAfterCombine << "\n";);
+ LLVM_DEBUG(dbgs() << "\t\tResource length before replacement: "
+ << ResLenBeforeCombine
+ << " and after: " << ResLenAfterCombine << "\n";);
+ LLVM_DEBUG(
+ ResLenAfterCombine <= ResLenBeforeCombine
+ ? dbgs() << "\t\t As result it IMPROVES/PRESERVES Resource Length\n"
+ : dbgs() << "\t\t As result it DOES NOT improve/preserve Resource "
+ "Length\n");
return ResLenAfterCombine <= ResLenBeforeCombine;
}
@@ -409,6 +455,35 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
NumInstCombined++;
}
+// Check that the difference between original and new latency is decreasing for
+// later patterns. This helps to discover sub-optimal pattern orderings.
+void MachineCombiner::verifyPatternOrder(
+ MachineBasicBlock *MBB, MachineInstr &Root,
+ SmallVector<MachineCombinerPattern, 16> &Patterns) {
+ long PrevLatencyDiff = std::numeric_limits<long>::max();
+ (void)PrevLatencyDiff; // Variable is used in assert only.
+ for (auto P : Patterns) {
+ SmallVector<MachineInstr *, 16> InsInstrs;
+ SmallVector<MachineInstr *, 16> DelInstrs;
+ DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
+ TII->genAlternativeCodeSequence(Root, P, InsInstrs, DelInstrs,
+ InstrIdxForVirtReg);
+ // Found pattern, but did not generate alternative sequence.
+ // This can happen e.g. when an immediate could not be materialized
+ // in a single instruction.
+ if (InsInstrs.empty() || !TSchedModel.hasInstrSchedModelOrItineraries())
+ continue;
+
+ unsigned NewRootLatency, RootLatency;
+ std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences(
+ Root, InsInstrs, DelInstrs, MinInstr->getTrace(MBB));
+ long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency);
+ assert(CurrentLatencyDiff <= PrevLatencyDiff &&
+ "Current pattern is better than previous pattern.");
+ PrevLatencyDiff = CurrentLatencyDiff;
+ }
+}
+
/// Substitute a slow code sequence with a faster one by
/// evaluating instruction combining pattern.
/// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
@@ -418,7 +493,7 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
/// sequence is shorter.
bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
bool Changed = false;
- DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
bool IncrementalUpdate = false;
auto BlockIter = MBB->begin();
@@ -433,8 +508,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
while (BlockIter != MBB->end()) {
auto &MI = *BlockIter++;
-
- DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
SmallVector<MachineCombinerPattern, 16> Patterns;
// The motivating example is:
//
@@ -459,11 +532,16 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// The algorithm does not try to evaluate all patterns and pick the best.
// This is only an artificial restriction though. In practice there is
// mostly one pattern, and getMachineCombinerPatterns() can order patterns
- // based on an internal cost heuristic.
+ // based on an internal cost heuristic. If
+ // machine-combiner-verify-pattern-order is enabled, all patterns are
+ // checked to ensure later patterns do not provide better latency savings.
if (!TII->getMachineCombinerPatterns(MI, Patterns))
continue;
+ if (VerifyPatternOrder)
+ verifyPatternOrder(MBB, MI, Patterns);
+
for (auto P : Patterns) {
SmallVector<MachineInstr *, 16> InsInstrs;
SmallVector<MachineInstr *, 16> DelInstrs;
@@ -478,6 +556,19 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (!NewInstCount)
continue;
+ LLVM_DEBUG(if (dump_intrs) {
+ dbgs() << "\tFor the Pattern (" << (int)P << ") these instructions could be removed\n";
+ for (auto const *InstrPtr : DelInstrs) {
+ dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": ";
+ InstrPtr->print(dbgs(), false, false, false, TII);
+ }
+ dbgs() << "\tThese instructions could replace the removed ones\n";
+ for (auto const *InstrPtr : InsInstrs) {
+ dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": ";
+ InstrPtr->print(dbgs(), false, false, false, TII);
+ }
+ });
+
bool SubstituteAlways = false;
if (ML && TII->isThroughputPattern(P))
SubstituteAlways = true;
@@ -539,20 +630,22 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
}
bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- TII = STI.getInstrInfo();
- TRI = STI.getRegisterInfo();
- SchedModel = STI.getSchedModel();
- TSchedModel.init(SchedModel, &STI, TII);
+ STI = &MF.getSubtarget();
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
+ SchedModel = STI->getSchedModel();
+ TSchedModel.init(STI);
MRI = &MF.getRegInfo();
MLI = &getAnalysis<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
OptSize = MF.getFunction().optForSize();
- DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
if (!TII->useMachineCombiner()) {
- DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n");
+ LLVM_DEBUG(
+ dbgs()
+ << " Skipping pass: Target does not support machine combiner\n");
return false;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index fcec05adc732..3bf8147a06c3 100644
--- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -9,6 +9,35 @@
//
// This is an extremely simple MachineInstr-level copy propagation pass.
//
+// This pass forwards the source of COPYs to the users of their destinations
+// when doing so is legal. For example:
+//
+// %reg1 = COPY %reg0
+// ...
+// ... = OP %reg1
+//
+// If
+// - %reg0 has not been clobbered by the time of the use of %reg1
+// - the register class constraints are satisfied
+// - the COPY def is the only value that reaches OP
+// then this pass replaces the above with:
+//
+// %reg1 = COPY %reg0
+// ...
+// ... = OP %reg0
+//
+// This pass also removes some redundant COPYs. For example:
+//
+// %R1 = COPY %R0
+// ... // No clobber of %R1
+// %R0 = COPY %R1 <<< Removed
+//
+// or
+//
+// %R1 = COPY %R0
+// ... // No clobber of %R0
+// %R1 = COPY %R0 <<< Removed
+//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseMap.h"
@@ -23,11 +52,13 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <iterator>
@@ -37,6 +68,9 @@ using namespace llvm;
#define DEBUG_TYPE "machine-cp"
STATISTIC(NumDeletes, "Number of dead copies deleted");
+STATISTIC(NumCopyForwards, "Number of copy uses forwarded");
+DEBUG_COUNTER(FwdCounter, "machine-cp-fwd",
+ "Controls which register COPYs are forwarded");
namespace {
@@ -73,6 +107,10 @@ using Reg2MIMap = DenseMap<unsigned, MachineInstr *>;
void ReadRegister(unsigned Reg);
void CopyPropagateBlock(MachineBasicBlock &MBB);
bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
+ void forwardUses(MachineInstr &MI);
+ bool isForwardableRegClassCopy(const MachineInstr &Copy,
+ const MachineInstr &UseI, unsigned UseIdx);
+ bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
/// Candidates for deletion.
SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;
@@ -143,7 +181,8 @@ void MachineCopyPropagation::ReadRegister(unsigned Reg) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
Reg2MIMap::iterator CI = CopyMap.find(*AI);
if (CI != CopyMap.end()) {
- DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump());
+ LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: ";
+ CI->second->dump());
MaybeDeadCopies.remove(CI->second);
}
}
@@ -191,7 +230,7 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
if (!isNopCopy(PrevCopy, Src, Def, TRI))
return false;
- DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump());
+ LLVM_DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump());
// Copy was redundantly redefining either Src or Def. Remove earlier kill
// flags between Copy and PrevCopy because the value will be reused now.
@@ -208,14 +247,163 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
return true;
}
+/// Decide whether we should forward the source of \param Copy to its use in
+/// \param UseI based on the physical register class constraints of the opcode
+/// and avoiding introducing more cross-class COPYs.
+bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
+ const MachineInstr &UseI,
+ unsigned UseIdx) {
+
+ unsigned CopySrcReg = Copy.getOperand(1).getReg();
+
+ // If the new register meets the opcode register constraints, then allow
+ // forwarding.
+ if (const TargetRegisterClass *URC =
+ UseI.getRegClassConstraint(UseIdx, TII, TRI))
+ return URC->contains(CopySrcReg);
+
+ if (!UseI.isCopy())
+ return false;
+
+ /// COPYs don't have register class constraints, so if the user instruction
+ /// is a COPY, we just try to avoid introducing additional cross-class
+ /// COPYs. For example:
+ ///
+ /// RegClassA = COPY RegClassB // Copy parameter
+ /// ...
+ /// RegClassB = COPY RegClassA // UseI parameter
+ ///
+ /// which after forwarding becomes
+ ///
+ /// RegClassA = COPY RegClassB
+ /// ...
+ /// RegClassB = COPY RegClassB
+ ///
+ /// so we have reduced the number of cross-class COPYs and potentially
+ /// introduced a nop COPY that can be removed.
+ const TargetRegisterClass *UseDstRC =
+ TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());
+
+ const TargetRegisterClass *SuperRC = UseDstRC;
+ for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses();
+ SuperRC; SuperRC = *SuperRCI++)
+ if (SuperRC->contains(CopySrcReg))
+ return true;
+
+ return false;
+}
+
+/// Check that \p MI does not have implicit uses that overlap with it's \p Use
+/// operand (the register being replaced), since these can sometimes be
+/// implicitly tied to other operands. For example, on AMDGPU:
+///
+/// V_MOVRELS_B32_e32 %VGPR2, %M0<imp-use>, %EXEC<imp-use>, %VGPR2_VGPR3_VGPR4_VGPR5<imp-use>
+///
+/// the %VGPR2 is implicitly tied to the larger reg operand, but we have no
+/// way of knowing we need to update the latter when updating the former.
+bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI,
+ const MachineOperand &Use) {
+ for (const MachineOperand &MIUse : MI.uses())
+ if (&MIUse != &Use && MIUse.isReg() && MIUse.isImplicit() &&
+ MIUse.isUse() && TRI->regsOverlap(Use.getReg(), MIUse.getReg()))
+ return true;
+
+ return false;
+}
+
+/// Look for available copies whose destination register is used by \p MI and
+/// replace the use in \p MI with the copy's source register.
+void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
+ if (AvailCopyMap.empty())
+ return;
+
+ // Look for non-tied explicit vreg uses that have an active COPY
+ // instruction that defines the physical register allocated to them.
+ // Replace the vreg with the source of the active COPY.
+ for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx < OpEnd;
+ ++OpIdx) {
+ MachineOperand &MOUse = MI.getOperand(OpIdx);
+ // Don't forward into undef use operands since doing so can cause problems
+ // with the machine verifier, since it doesn't treat undef reads as reads,
+ // so we can end up with a live range that ends on an undef read, leading to
+ // an error that the live range doesn't end on a read of the live range
+ // register.
+ if (!MOUse.isReg() || MOUse.isTied() || MOUse.isUndef() || MOUse.isDef() ||
+ MOUse.isImplicit())
+ continue;
+
+ if (!MOUse.getReg())
+ continue;
+
+ // Check that the register is marked 'renamable' so we know it is safe to
+ // rename it without violating any constraints that aren't expressed in the
+ // IR (e.g. ABI or opcode requirements).
+ if (!MOUse.isRenamable())
+ continue;
+
+ auto CI = AvailCopyMap.find(MOUse.getReg());
+ if (CI == AvailCopyMap.end())
+ continue;
+
+ MachineInstr &Copy = *CI->second;
+ unsigned CopyDstReg = Copy.getOperand(0).getReg();
+ const MachineOperand &CopySrc = Copy.getOperand(1);
+ unsigned CopySrcReg = CopySrc.getReg();
+
+ // FIXME: Don't handle partial uses of wider COPYs yet.
+ if (MOUse.getReg() != CopyDstReg) {
+ LLVM_DEBUG(
+ dbgs() << "MCP: FIXME! Not forwarding COPY to sub-register use:\n "
+ << MI);
+ continue;
+ }
+
+ // Don't forward COPYs of reserved regs unless they are constant.
+ if (MRI->isReserved(CopySrcReg) && !MRI->isConstantPhysReg(CopySrcReg))
+ continue;
+
+ if (!isForwardableRegClassCopy(Copy, MI, OpIdx))
+ continue;
+
+ if (hasImplicitOverlap(MI, MOUse))
+ continue;
+
+ if (!DebugCounter::shouldExecute(FwdCounter)) {
+ LLVM_DEBUG(dbgs() << "MCP: Skipping forwarding due to debug counter:\n "
+ << MI);
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI)
+ << "\n with " << printReg(CopySrcReg, TRI)
+ << "\n in " << MI << " from " << Copy);
+
+ MOUse.setReg(CopySrcReg);
+ if (!CopySrc.isRenamable())
+ MOUse.setIsRenamable(false);
+
+ LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
+
+ // Clear kill markers that may have been invalidated.
+ for (MachineInstr &KMI :
+ make_range(Copy.getIterator(), std::next(MI.getIterator())))
+ KMI.clearRegisterKills(CopySrcReg, TRI);
+
+ ++NumCopyForwards;
+ Changed = true;
+ }
+}
+
void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
- DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n");
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
MachineInstr *MI = &*I;
++I;
- if (MI->isCopy()) {
+ // Analyze copies (which don't overlap themselves).
+ if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(),
+ MI->getOperand(1).getReg())) {
unsigned Def = MI->getOperand(0).getReg();
unsigned Src = MI->getOperand(1).getReg();
@@ -241,6 +429,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def))
continue;
+ forwardUses(*MI);
+
+ // Src may have been changed by forwardUses()
+ Src = MI->getOperand(1).getReg();
+
// If Src is defined by a previous copy, the previous copy cannot be
// eliminated.
ReadRegister(Src);
@@ -253,7 +446,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
ReadRegister(Reg);
}
- DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump());
+ LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump());
// Copy is now a candidate for deletion.
if (!MRI->isReserved(Def))
@@ -292,6 +485,20 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
continue;
}
+ // Clobber any earlyclobber regs first.
+ for (const MachineOperand &MO : MI->operands())
+ if (MO.isReg() && MO.isEarlyClobber()) {
+ unsigned Reg = MO.getReg();
+ // If we have a tied earlyclobber, that means it is also read by this
+ // instruction, so we need to make sure we don't remove it as dead
+ // later.
+ if (MO.isTied())
+ ReadRegister(Reg);
+ ClobberRegister(Reg);
+ }
+
+ forwardUses(*MI);
+
// Not a copy.
SmallVector<unsigned, 2> Defs;
const MachineOperand *RegMask = nullptr;
@@ -307,10 +514,10 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
assert(!TargetRegisterInfo::isVirtualRegister(Reg) &&
"MachineCopyPropagation should be run after register allocation!");
- if (MO.isDef()) {
+ if (MO.isDef() && !MO.isEarlyClobber()) {
Defs.push_back(Reg);
continue;
- } else if (MO.readsReg())
+ } else if (!MO.isDebug() && MO.readsReg())
ReadRegister(Reg);
}
@@ -331,8 +538,8 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
continue;
}
- DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: ";
- MaybeDead->dump());
+ LLVM_DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: ";
+ MaybeDead->dump());
// erase() will return the next valid iterator pointing to the next
// element after the erased one.
@@ -364,6 +571,8 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// since we don't want to trust live-in lists.
if (MBB.succ_empty()) {
for (MachineInstr *MaybeDead : MaybeDeadCopies) {
+ LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: ";
+ MaybeDead->dump());
assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg()));
MaybeDead->eraseFromParent();
Changed = true;
diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
index 517ac29b6450..6b2802626456 100644
--- a/contrib/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
@@ -65,8 +65,21 @@ void MachineDominatorTree::releaseMemory() {
}
void MachineDominatorTree::verifyAnalysis() const {
- if (DT && VerifyMachineDomInfo)
- verifyDomTree();
+ if (DT && VerifyMachineDomInfo) {
+ MachineFunction &F = *getRoot()->getParent();
+
+ DomTreeBase<MachineBasicBlock> OtherDT;
+ OtherDT.recalculate(F);
+ if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() ||
+ DT->compare(OtherDT)) {
+ errs() << "MachineDominatorTree for function " << F.getName()
+ << " is not up to date!\nComputed:\n";
+ DT->print(errs());
+ errs() << "\nActual:\n";
+ OtherDT.print(errs());
+ abort();
+ }
+ }
}
void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
@@ -138,21 +151,3 @@ void MachineDominatorTree::applySplitCriticalEdges() const {
NewBBs.clear();
CriticalEdgesToSplit.clear();
}
-
-void MachineDominatorTree::verifyDomTree() const {
- if (!DT)
- return;
- MachineFunction &F = *getRoot()->getParent();
-
- DomTreeBase<MachineBasicBlock> OtherDT;
- OtherDT.recalculate(F);
- if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() ||
- DT->compare(OtherDT)) {
- errs() << "MachineDominatorTree for function " << F.getName()
- << " is not up to date!\nComputed:\n";
- DT->print(errs());
- errs() << "\nActual:\n";
- OtherDT.print(errs());
- abort();
- }
-}
diff --git a/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp b/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp
index 2aa9d6b816c8..0b316871dbdf 100644
--- a/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -40,9 +41,9 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
unsigned StackAlign) {
if (!ShouldClamp || Align <= StackAlign)
return Align;
- DEBUG(dbgs() << "Warning: requested alignment " << Align
- << " exceeds the stack alignment " << StackAlign
- << " when stack realignment is off" << '\n');
+ LLVM_DEBUG(dbgs() << "Warning: requested alignment " << Align
+ << " exceeds the stack alignment " << StackAlign
+ << " when stack realignment is off" << '\n');
return StackAlign;
}
@@ -217,7 +218,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
OS << " fi#" << (int)(i-NumFixedObjects) << ": ";
if (SO.StackID != 0)
- OS << "id=" << SO.StackID << ' ';
+ OS << "id=" << static_cast<unsigned>(SO.StackID) << ' ';
if (SO.Size == ~0ULL) {
OS << "dead\n";
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index bc8eb1429d92..dd668bcf6193 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -37,7 +37,9 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -174,6 +176,11 @@ void MachineFunction::init() {
WinEHInfo = new (Allocator) WinEHFuncInfo();
}
+ if (isScopedEHPersonality(classifyEHPersonality(
+ F.hasPersonalityFn() ? F.getPersonalityFn() : nullptr))) {
+ WasmEHInfo = new (Allocator) WasmEHFuncInfo();
+ }
+
assert(Target.isCompatibleDataLayout(getDataLayout()) &&
"Can't create a MachineFunction using a Module with a "
"Target-incompatible DataLayout attached\n");
@@ -195,6 +202,7 @@ void MachineFunction::clear() {
// Do call MachineBasicBlock destructors, it contains std::vectors.
for (iterator I = begin(), E = end(); I != E; I = BasicBlocks.erase(I))
I->Insts.clearAndLeakNodesUnsafely();
+ MBBNumbering.clear();
InstructionRecycler.clear(Allocator);
OperandRecycler.clear(Allocator);
@@ -478,6 +486,14 @@ const char *MachineFunction::createExternalSymbolName(StringRef Name) {
return Dest;
}
+uint32_t *MachineFunction::allocateRegMask() {
+ unsigned NumRegs = getSubtarget().getRegisterInfo()->getNumRegs();
+ unsigned Size = MachineOperand::getRegMaskSize(NumRegs);
+ uint32_t *Mask = Allocator.Allocate<uint32_t>(Size);
+ memset(Mask, 0, Size * sizeof(Mask[0]));
+ return Mask;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MachineFunction::dump() const {
print(dbgs());
@@ -522,7 +538,8 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
MST.incorporateFunction(getFunction());
for (const auto &BB : *this) {
OS << '\n';
- BB.print(OS, MST, Indexes);
+ // If we print the whole function, print it at its most verbose level.
+ BB.print(OS, MST, Indexes, /*IsStandalone=*/true);
}
OS << "\n# End machine code for function " << getName() << ".\n\n";
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 5ffe33006131..67ac95740e3e 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -24,7 +24,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -85,7 +84,6 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MemoryDependenceWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<SCEVAAWrapperPass>();
- AU.addPreserved<StackProtector>();
FunctionPass::getAnalysisUsage(AU);
}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index 14655c6eb700..96fcfdb72ad7 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
@@ -74,6 +75,29 @@
using namespace llvm;
+static const MachineFunction *getMFIfAvailable(const MachineInstr &MI) {
+ if (const MachineBasicBlock *MBB = MI.getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ return MF;
+ return nullptr;
+}
+
+// Try to crawl up to the machine function and get TRI and IntrinsicInfo from
+// it.
+static void tryToGetTargetInfo(const MachineInstr &MI,
+ const TargetRegisterInfo *&TRI,
+ const MachineRegisterInfo *&MRI,
+ const TargetIntrinsicInfo *&IntrinsicInfo,
+ const TargetInstrInfo *&TII) {
+
+ if (const MachineFunction *MF = getMFIfAvailable(MI)) {
+ TRI = MF->getSubtarget().getRegisterInfo();
+ MRI = &MF->getRegInfo();
+ IntrinsicInfo = MF->getTarget().getIntrinsicInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ }
+}
+
void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
if (MCID->ImplicitDefs)
for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs;
@@ -358,6 +382,12 @@ MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
return std::make_pair(MemBegin, CombinedNumMemRefs);
}
+uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
+ // For now, the just return the union of the flags. If the flags get more
+ // complicated over time, we might need more logic here.
+ return getFlags() | Other.getFlags();
+}
+
bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
assert(!isBundledWithPred() && "Must be called on bundle header");
for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) {
@@ -437,8 +467,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other,
return false;
}
}
- // If DebugLoc does not match then two dbg.values are not identical.
- if (isDebugValue())
+ // If DebugLoc does not match then two debug instructions are not identical.
+ if (isDebugInstr())
if (getDebugLoc() && Other.getDebugLoc() &&
getDebugLoc() != Other.getDebugLoc())
return false;
@@ -489,21 +519,39 @@ void MachineInstr::eraseFromBundle() {
getParent()->erase_instr(this);
}
-/// getNumExplicitOperands - Returns the number of non-implicit operands.
-///
unsigned MachineInstr::getNumExplicitOperands() const {
unsigned NumOperands = MCID->getNumOperands();
if (!MCID->isVariadic())
return NumOperands;
- for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = getOperand(i);
- if (!MO.isReg() || !MO.isImplicit())
- NumOperands++;
+ for (unsigned I = NumOperands, E = getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = getOperand(I);
+ // The operands must always be in the following order:
+ // - explicit reg defs,
+ // - other explicit operands (reg uses, immediates, etc.),
+ // - implicit reg defs
+ // - implicit reg uses
+ if (MO.isReg() && MO.isImplicit())
+ break;
+ ++NumOperands;
}
return NumOperands;
}
+unsigned MachineInstr::getNumExplicitDefs() const {
+ unsigned NumDefs = MCID->getNumDefs();
+ if (!MCID->isVariadic())
+ return NumDefs;
+
+ for (unsigned I = NumDefs, E = getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = getOperand(I);
+ if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
+ break;
+ ++NumDefs;
+ }
+ return NumDefs;
+}
+
void MachineInstr::bundleWithPred() {
assert(!isBundledWithPred() && "MI is already bundled with its predecessor");
setFlag(BundledPred);
@@ -583,6 +631,11 @@ int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx,
return -1;
}
+const DILabel *MachineInstr::getDebugLabel() const {
+ assert(isDebugLabel() && "not a DBG_LABEL");
+ return cast<DILabel>(getOperand(0).getMetadata());
+}
+
const DILocalVariable *MachineInstr::getDebugVariable() const {
assert(isDebugValue() && "not a DBG_VALUE");
return cast<DILocalVariable>(getOperand(2).getMetadata());
@@ -905,8 +958,7 @@ void MachineInstr::clearKillInfo() {
}
}
-void MachineInstr::substituteRegister(unsigned FromReg,
- unsigned ToReg,
+void MachineInstr::substituteRegister(unsigned FromReg, unsigned ToReg,
unsigned SubIdx,
const TargetRegisterInfo &RegInfo) {
if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
@@ -941,7 +993,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
return false;
}
- if (isPosition() || isDebugValue() || isTerminator() ||
+ if (isPosition() || isDebugInstr() || isTerminator() ||
hasUnmodeledSideEffects())
return false;
@@ -1195,8 +1247,12 @@ LLT MachineInstr::getTypeToPrint(unsigned OpIdx, SmallBitVector &PrintedTypes,
if (PrintedTypes[OpInfo.getGenericTypeIndex()])
return LLT{};
- PrintedTypes.set(OpInfo.getGenericTypeIndex());
- return MRI.getType(Op.getReg());
+ LLT TypeToPrint = MRI.getType(Op.getReg());
+ // Don't mark the type index printed if it wasn't actually printed: maybe
+ // another operand with the same type index has an actual type attached:
+ if (TypeToPrint.isValid())
+ PrintedTypes.set(OpInfo.getGenericTypeIndex());
+ return TypeToPrint;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1206,39 +1262,36 @@ LLVM_DUMP_METHOD void MachineInstr::dump() const {
}
#endif
-void MachineInstr::print(raw_ostream &OS, bool SkipOpers, bool SkipDebugLoc,
+void MachineInstr::print(raw_ostream &OS, bool IsStandalone, bool SkipOpers,
+ bool SkipDebugLoc, bool AddNewLine,
const TargetInstrInfo *TII) const {
const Module *M = nullptr;
- if (const MachineBasicBlock *MBB = getParent())
- if (const MachineFunction *MF = MBB->getParent())
- M = MF->getFunction().getParent();
+ const Function *F = nullptr;
+ if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+ F = &MF->getFunction();
+ M = F->getParent();
+ if (!TII)
+ TII = MF->getSubtarget().getInstrInfo();
+ }
ModuleSlotTracker MST(M);
- print(OS, MST, SkipOpers, SkipDebugLoc, TII);
+ if (F)
+ MST.incorporateFunction(*F);
+ print(OS, MST, IsStandalone, SkipOpers, SkipDebugLoc, TII);
}
void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
- bool SkipOpers, bool SkipDebugLoc,
- const TargetInstrInfo *TII) const {
+ bool IsStandalone, bool SkipOpers, bool SkipDebugLoc,
+ bool AddNewLine, const TargetInstrInfo *TII) const {
// We can be a bit tidier if we know the MachineFunction.
const MachineFunction *MF = nullptr;
const TargetRegisterInfo *TRI = nullptr;
const MachineRegisterInfo *MRI = nullptr;
const TargetIntrinsicInfo *IntrinsicInfo = nullptr;
+ tryToGetTargetInfo(*this, TRI, MRI, IntrinsicInfo, TII);
- if (const MachineBasicBlock *MBB = getParent()) {
- MF = MBB->getParent();
- if (MF) {
- MRI = &MF->getRegInfo();
- TRI = MF->getSubtarget().getRegisterInfo();
- if (!TII)
- TII = MF->getSubtarget().getInstrInfo();
- IntrinsicInfo = MF->getTarget().getIntrinsicInfo();
- }
- }
-
- // Save a list of virtual registers.
- SmallVector<unsigned, 8> VirtRegs;
+ if (isCFIInstruction())
+ assert(getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
SmallBitVector PrintedTypes(8);
bool ShouldPrintRegisterTies = hasComplexRegisterTies();
@@ -1250,26 +1303,47 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
return findTiedOperandIdx(OpIdx);
return 0U;
};
+ unsigned StartOp = 0;
+ unsigned e = getNumOperands();
+
// Print explicitly defined operands on the left of an assignment syntax.
- unsigned StartOp = 0, e = getNumOperands();
- for (; StartOp < e && getOperand(StartOp).isReg() &&
- getOperand(StartOp).isDef() && !getOperand(StartOp).isImplicit();
- ++StartOp) {
+ while (StartOp < e) {
+ const MachineOperand &MO = getOperand(StartOp);
+ if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
+ break;
+
if (StartOp != 0)
OS << ", ";
+
LLT TypeToPrint = MRI ? getTypeToPrint(StartOp, PrintedTypes, *MRI) : LLT{};
unsigned TiedOperandIdx = getTiedOperandIdx(StartOp);
- getOperand(StartOp).print(OS, MST, TypeToPrint, /*PrintDef=*/false,
- ShouldPrintRegisterTies, TiedOperandIdx, TRI,
- IntrinsicInfo);
- unsigned Reg = getOperand(StartOp).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- VirtRegs.push_back(Reg);
+ MO.print(OS, MST, TypeToPrint, /*PrintDef=*/false, IsStandalone,
+ ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
+ ++StartOp;
}
if (StartOp != 0)
OS << " = ";
+ if (getFlag(MachineInstr::FrameSetup))
+ OS << "frame-setup ";
+ if (getFlag(MachineInstr::FrameDestroy))
+ OS << "frame-destroy ";
+ if (getFlag(MachineInstr::FmNoNans))
+ OS << "nnan ";
+ if (getFlag(MachineInstr::FmNoInfs))
+ OS << "ninf ";
+ if (getFlag(MachineInstr::FmNsz))
+ OS << "nsz ";
+ if (getFlag(MachineInstr::FmArcp))
+ OS << "arcp ";
+ if (getFlag(MachineInstr::FmContract))
+ OS << "contract ";
+ if (getFlag(MachineInstr::FmAfn))
+ OS << "afn ";
+ if (getFlag(MachineInstr::FmReassoc))
+ OS << "reassoc ";
+
// Print the opcode name.
if (TII)
OS << TII->getName(getOpcode());
@@ -1290,7 +1364,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
const unsigned OpIdx = InlineAsm::MIOp_AsmString;
LLT TypeToPrint = MRI ? getTypeToPrint(OpIdx, PrintedTypes, *MRI) : LLT{};
unsigned TiedOperandIdx = getTiedOperandIdx(OpIdx);
- getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true,
+ getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
ShouldPrintRegisterTies, TiedOperandIdx, TRI,
IntrinsicInfo);
@@ -1318,18 +1392,9 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
- if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- VirtRegs.push_back(MO.getReg());
-
if (FirstOp) FirstOp = false; else OS << ",";
OS << " ";
- if (i < getDesc().NumOperands) {
- const MCOperandInfo &MCOI = getDesc().OpInfo[i];
- if (MCOI.isPredicate())
- OS << "pred:";
- if (MCOI.isOptionalDef())
- OS << "opt:";
- }
+
if (isDebugValue() && MO.isMetadata()) {
// Pretty print DBG_VALUE instructions.
auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata());
@@ -1338,12 +1403,20 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
else {
LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
unsigned TiedOperandIdx = getTiedOperandIdx(i);
- MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true,
+ MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
+ ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
+ }
+ } else if (isDebugLabel() && MO.isMetadata()) {
+ // Pretty print DBG_LABEL instructions.
+ auto *DIL = dyn_cast<DILabel>(MO.getMetadata());
+ if (DIL && !DIL->getName().empty())
+ OS << "\"" << DIL->getName() << '\"';
+ else {
+ LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
+ unsigned TiedOperandIdx = getTiedOperandIdx(i);
+ MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
}
- } else if (TRI && (isInsertSubreg() || isRegSequence() ||
- (isSubregToReg() && i == 3)) && MO.isImm()) {
- OS << TRI->getSubRegIndexName(MO.getImm());
} else if (i == AsmDescOp && MO.isImm()) {
// Pretty print the inline asm operand descriptor.
OS << '$' << AsmOpCount++;
@@ -1406,77 +1479,66 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
unsigned TiedOperandIdx = getTiedOperandIdx(i);
if (MO.isImm() && isOperandSubregIdx(i))
- MachineOperand::printSubregIdx(OS, MO.getImm(), TRI);
+ MachineOperand::printSubRegIdx(OS, MO.getImm(), TRI);
else
- MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true,
+ MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
}
}
- bool HaveSemi = false;
- const unsigned PrintableFlags = FrameSetup | FrameDestroy;
- if (Flags & PrintableFlags) {
- if (!HaveSemi) {
- OS << ";";
- HaveSemi = true;
+ if (!SkipDebugLoc) {
+ if (const DebugLoc &DL = getDebugLoc()) {
+ if (!FirstOp)
+ OS << ',';
+ OS << " debug-location ";
+ DL->printAsOperand(OS, MST);
}
- OS << " flags: ";
-
- if (Flags & FrameSetup)
- OS << "FrameSetup";
-
- if (Flags & FrameDestroy)
- OS << "FrameDestroy";
}
if (!memoperands_empty()) {
- if (!HaveSemi) {
- OS << ";";
- HaveSemi = true;
+ SmallVector<StringRef, 0> SSNs;
+ const LLVMContext *Context = nullptr;
+ std::unique_ptr<LLVMContext> CtxPtr;
+ const MachineFrameInfo *MFI = nullptr;
+ if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+ MFI = &MF->getFrameInfo();
+ Context = &MF->getFunction().getContext();
+ } else {
+ CtxPtr = llvm::make_unique<LLVMContext>();
+ Context = CtxPtr.get();
}
- OS << " mem:";
- for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
- i != e; ++i) {
- (*i)->print(OS, MST);
- if (std::next(i) != e)
- OS << " ";
+ OS << " :: ";
+ bool NeedComma = false;
+ for (const MachineMemOperand *Op : memoperands()) {
+ if (NeedComma)
+ OS << ", ";
+ Op->print(OS, MST, SSNs, *Context, MFI, TII);
+ NeedComma = true;
}
}
- // Print the regclass of any virtual registers encountered.
- if (MRI && !VirtRegs.empty()) {
+ if (SkipDebugLoc)
+ return;
+
+ bool HaveSemi = false;
+
+ // Print debug location information.
+ if (const DebugLoc &DL = getDebugLoc()) {
if (!HaveSemi) {
- OS << ";";
+ OS << ';';
HaveSemi = true;
}
- for (unsigned i = 0; i != VirtRegs.size(); ++i) {
- const RegClassOrRegBank &RC = MRI->getRegClassOrRegBank(VirtRegs[i]);
- if (!RC)
- continue;
- // Generic virtual registers do not have register classes.
- if (RC.is<const RegisterBank *>())
- OS << " " << RC.get<const RegisterBank *>()->getName();
- else
- OS << " "
- << TRI->getRegClassName(RC.get<const TargetRegisterClass *>());
- OS << ':' << printReg(VirtRegs[i]);
- for (unsigned j = i+1; j != VirtRegs.size();) {
- if (MRI->getRegClassOrRegBank(VirtRegs[j]) != RC) {
- ++j;
- continue;
- }
- if (VirtRegs[i] != VirtRegs[j])
- OS << "," << printReg(VirtRegs[j]);
- VirtRegs.erase(VirtRegs.begin()+j);
- }
- }
+ OS << ' ';
+ DL.print(OS);
}
- // Print debug location information.
+ // Print extra comments for DEBUG_VALUE.
if (isDebugValue() && getOperand(e - 2).isMetadata()) {
- if (!HaveSemi)
+ if (!HaveSemi) {
OS << ";";
+ HaveSemi = true;
+ }
auto *DV = cast<DILocalVariable>(getOperand(e - 2).getMetadata());
OS << " line no:" << DV->getLine();
if (auto *InlinedAt = debugLoc->getInlinedAt()) {
@@ -1489,16 +1551,11 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
if (isIndirectDebugValue())
OS << " indirect";
- } else if (SkipDebugLoc) {
- return;
- } else if (debugLoc && MF) {
- if (!HaveSemi)
- OS << ";";
- OS << " dbg:";
- debugLoc.print(OS);
}
+ // TODO: DBG_LABEL
- OS << '\n';
+ if (AddNewLine)
+ OS << '\n';
}
bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
@@ -1737,33 +1794,55 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
+ auto MIB = BuildMI(MF, DL, MCID).addReg(Reg, RegState::Debug);
if (IsIndirect)
- return BuildMI(MF, DL, MCID)
- .addReg(Reg, RegState::Debug)
- .addImm(0U)
- .addMetadata(Variable)
- .addMetadata(Expr);
+ MIB.addImm(0U);
else
- return BuildMI(MF, DL, MCID)
- .addReg(Reg, RegState::Debug)
- .addReg(0U, RegState::Debug)
- .addMetadata(Variable)
- .addMetadata(Expr);
+ MIB.addReg(0U, RegState::Debug);
+ return MIB.addMetadata(Variable).addMetadata(Expr);
}
+MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
+ const MCInstrDesc &MCID, bool IsIndirect,
+ MachineOperand &MO, const MDNode *Variable,
+ const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ if (MO.isReg())
+ return BuildMI(MF, DL, MCID, IsIndirect, MO.getReg(), Variable, Expr);
+
+ auto MIB = BuildMI(MF, DL, MCID).add(MO);
+ if (IsIndirect)
+ MIB.addImm(0U);
+ else
+ MIB.addReg(0U, RegState::Debug);
+ return MIB.addMetadata(Variable).addMetadata(Expr);
+ }
+
MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, const MCInstrDesc &MCID,
bool IsIndirect, unsigned Reg,
const MDNode *Variable, const MDNode *Expr) {
- assert(isa<DILocalVariable>(Variable) && "not a variable");
- assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
MachineFunction &MF = *BB.getParent();
MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, Reg, Variable, Expr);
BB.insert(I, MI);
return MachineInstrBuilder(MF, MI);
}
+MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, const MCInstrDesc &MCID,
+ bool IsIndirect, MachineOperand &MO,
+ const MDNode *Variable, const MDNode *Expr) {
+ MachineFunction &MF = *BB.getParent();
+ MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, MO, Variable, Expr);
+ BB.insert(I, MI);
+ return MachineInstrBuilder(MF, *MI);
+}
+
/// Compute the new DIExpression to use with a DBG_VALUE for a spill slot.
/// This prepends DW_OP_deref when spilling an indirect DBG_VALUE.
static const DIExpression *computeExprForSpill(const MachineInstr &MI) {
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index 75d449c7ac6f..7332b7162030 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -71,6 +71,10 @@ SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
cl::desc("MachineLICM should sink instructions into "
"loops to avoid register spills"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+HoistConstStores("hoist-const-stores",
+ cl::desc("Hoist invariant stores"),
+ cl::init(true), cl::Hidden);
STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
@@ -82,17 +86,19 @@ STATISTIC(NumCSEed,
"Number of hoisted machine instructions CSEed");
STATISTIC(NumPostRAHoisted,
"Number of machine instructions hoisted out of loops post regalloc");
+STATISTIC(NumStoreConst,
+ "Number of stores of const phys reg hoisted out of loops");
namespace {
- class MachineLICM : public MachineFunctionPass {
+ class MachineLICMBase : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetLoweringBase *TLI;
const TargetRegisterInfo *TRI;
const MachineFrameInfo *MFI;
MachineRegisterInfo *MRI;
TargetSchedModel SchedModel;
- bool PreRegAlloc = true;
+ bool PreRegAlloc;
// Various analyses that we use...
AliasAnalysis *AA; // Alias analysis info.
@@ -138,16 +144,8 @@ namespace {
unsigned SpeculationState;
public:
- static char ID; // Pass identification, replacement for typeid
-
- MachineLICM() : MachineFunctionPass(ID) {
- initializeMachineLICMPass(*PassRegistry::getPassRegistry());
- }
-
- explicit MachineLICM(bool PreRA)
- : MachineFunctionPass(ID), PreRegAlloc(PreRA) {
- initializeMachineLICMPass(*PassRegistry::getPassRegistry());
- }
+ MachineLICMBase(char &PassID, bool PreRegAlloc)
+ : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -218,7 +216,7 @@ namespace {
DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
- void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
+ void HoistOutOfLoop(MachineDomTreeNode *HeaderN);
void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
@@ -252,11 +250,29 @@ namespace {
MachineBasicBlock *getCurPreheader();
};
+ class MachineLICM : public MachineLICMBase {
+ public:
+ static char ID;
+ MachineLICM() : MachineLICMBase(ID, false) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
+ };
+
+ class EarlyMachineLICM : public MachineLICMBase {
+ public:
+ static char ID;
+ EarlyMachineLICM() : MachineLICMBase(ID, true) {
+ initializeEarlyMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
+ };
+
} // end anonymous namespace
-char MachineLICM::ID = 0;
+char MachineLICM::ID;
+char EarlyMachineLICM::ID;
char &llvm::MachineLICMID = MachineLICM::ID;
+char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
"Machine Loop Invariant Code Motion", false, false)
@@ -266,6 +282,14 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
"Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
+ "Early Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
+ "Early Machine Loop Invariant Code Motion", false, false)
+
/// Test if the given loop is the outer-most loop that has a unique predecessor.
static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
// Check whether this loop even has a unique predecessor.
@@ -279,7 +303,7 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
return true;
}
-bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
+bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -290,15 +314,15 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
TRI = ST.getRegisterInfo();
MFI = &MF.getFrameInfo();
MRI = &MF.getRegInfo();
- SchedModel.init(ST.getSchedModel(), &ST, TII);
+ SchedModel.init(&ST);
PreRegAlloc = MRI->isSSA();
if (PreRegAlloc)
- DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
+ LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
else
- DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
- DEBUG(dbgs() << MF.getName() << " ********\n");
+ LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+ LLVM_DEBUG(dbgs() << MF.getName() << " ********\n");
if (PreRegAlloc) {
// Estimate register pressure during pre-regalloc pass.
@@ -350,6 +374,10 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
/// Return true if instruction stores to the specified frame.
static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
+ // Check mayStore before memory operands so that e.g. DBG_VALUEs will return
+ // true since they have no memory operands.
+ if (!MI->mayStore())
+ return false;
// If we lost memory operands, conservatively assume that the instruction
// writes to all slots.
if (MI->memoperands_empty())
@@ -368,11 +396,11 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
/// Examine the instruction for potentai LICM candidate. Also
/// gather register def and frame object update information.
-void MachineLICM::ProcessMI(MachineInstr *MI,
- BitVector &PhysRegDefs,
- BitVector &PhysRegClobbers,
- SmallSet<int, 32> &StoredFIs,
- SmallVectorImpl<CandidateInfo> &Candidates) {
+void MachineLICMBase::ProcessMI(MachineInstr *MI,
+ BitVector &PhysRegDefs,
+ BitVector &PhysRegClobbers,
+ SmallSet<int, 32> &StoredFIs,
+ SmallVectorImpl<CandidateInfo> &Candidates) {
bool RuledOut = false;
bool HasNonInvariantUse = false;
unsigned Def = 0;
@@ -455,7 +483,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
/// Walk the specified region of the CFG and hoist loop invariants out to the
/// preheader.
-void MachineLICM::HoistRegionPostRA() {
+void MachineLICMBase::HoistRegionPostRA() {
MachineBasicBlock *Preheader = getCurPreheader();
if (!Preheader)
return;
@@ -541,7 +569,7 @@ void MachineLICM::HoistRegionPostRA() {
/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
/// sure it is not killed by any instructions in the loop.
-void MachineLICM::AddToLiveIns(unsigned Reg) {
+void MachineLICMBase::AddToLiveIns(unsigned Reg) {
const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
for (MachineBasicBlock *BB : Blocks) {
if (!BB->isLiveIn(Reg))
@@ -558,13 +586,14 @@ void MachineLICM::AddToLiveIns(unsigned Reg) {
/// When an instruction is found to only use loop invariant operands that is
/// safe to hoist, this instruction is called to do the dirty work.
-void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
+void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) {
MachineBasicBlock *Preheader = getCurPreheader();
// Now move the instructions to the predecessor, inserting it before any
// terminator instructions.
- DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader) << " from "
- << printMBBReference(*MI->getParent()) << ": " << *MI);
+ LLVM_DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader)
+ << " from " << printMBBReference(*MI->getParent()) << ": "
+ << *MI);
// Splice the instruction to the preheader.
MachineBasicBlock *MBB = MI->getParent();
@@ -581,7 +610,7 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
/// Check if this mbb is guaranteed to execute. If not then a load from this mbb
/// may not be safe to hoist.
-bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
+bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {
if (SpeculationState != SpeculateUnknown)
return SpeculationState == SpeculateFalse;
@@ -600,24 +629,24 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
return true;
}
-void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
- DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
+void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) {
+ LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
// Remember livein register pressure.
BackTrace.push_back(RegPressure);
}
-void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
- DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n');
+void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) {
+ LLVM_DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n');
BackTrace.pop_back();
}
/// Destroy scope for the MBB that corresponds to the given dominator tree node
/// if its a leaf or all of its children are done. Walk up the dominator tree to
/// destroy ancestors which are now done.
-void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
- DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
- DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
if (OpenChildren[Node])
return;
@@ -638,7 +667,7 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
/// specified header block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
/// uses, allowing us to hoist a loop body in one pass without iteration.
-void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
+void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
MachineBasicBlock *Preheader = getCurPreheader();
if (!Preheader)
return;
@@ -708,6 +737,8 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
MachineInstr *MI = &*MII;
if (!Hoist(MI, Preheader))
UpdateRegPressure(MI);
+ // If we have hoisted an instruction that may store, it can only be a
+ // constant store.
MII = NextMII;
}
@@ -719,7 +750,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
/// Sink instructions into loops if profitable. This especially tries to prevent
/// register spills caused by register pressure if there is little to no
/// overhead moving instructions into loops.
-void MachineLICM::SinkIntoLoop() {
+void MachineLICMBase::SinkIntoLoop() {
MachineBasicBlock *Preheader = getCurPreheader();
if (!Preheader)
return;
@@ -773,7 +804,7 @@ static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
/// Find all virtual register references that are liveout of the preheader to
/// initialize the starting "register pressure". Note this does not count live
/// through (livein but not used) registers.
-void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
+void MachineLICMBase::InitRegPressure(MachineBasicBlock *BB) {
std::fill(RegPressure.begin(), RegPressure.end(), 0);
// If the preheader has only a single predecessor and it ends with a
@@ -792,8 +823,8 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
}
/// Update estimate of register pressure after the specified instruction.
-void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
- bool ConsiderUnseenAsDef) {
+void MachineLICMBase::UpdateRegPressure(const MachineInstr *MI,
+ bool ConsiderUnseenAsDef) {
auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
for (const auto &RPIdAndCost : Cost) {
unsigned Class = RPIdAndCost.first;
@@ -811,8 +842,8 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
/// figure out which usages are live-ins.
/// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
DenseMap<unsigned, int>
-MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
- bool ConsiderUnseenAsDef) {
+MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
+ bool ConsiderUnseenAsDef) {
DenseMap<unsigned, int> Cost;
if (MI->isImplicitDef())
return Cost;
@@ -871,13 +902,86 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
return false;
}
+// This function iterates through all the operands of the input store MI and
+// checks that each register operand statisfies isCallerPreservedPhysReg.
+// This means, the value being stored and the address where it is being stored
+// is constant throughout the body of the function (not including prologue and
+// epilogue). When called with an MI that isn't a store, it returns false.
+// A future improvement can be to check if the store registers are constant
+// throughout the loop rather than throughout the funtion.
+static bool isInvariantStore(const MachineInstr &MI,
+ const TargetRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI) {
+
+ bool FoundCallerPresReg = false;
+ if (!MI.mayStore() || MI.hasUnmodeledSideEffects() ||
+ (MI.getNumOperands() == 0))
+ return false;
+
+ // Check that all register operands are caller-preserved physical registers.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ // If operand is a virtual register, check if it comes from a copy of a
+ // physical register.
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF()))
+ return false;
+ else
+ FoundCallerPresReg = true;
+ } else if (!MO.isImm()) {
+ return false;
+ }
+ }
+ return FoundCallerPresReg;
+}
+
+// Return true if the input MI is a copy instruction that feeds an invariant
+// store instruction. This means that the src of the copy has to satisfy
+// isCallerPreservedPhysReg and atleast one of it's users should satisfy
+// isInvariantStore.
+static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
+ const MachineRegisterInfo *MRI,
+ const TargetRegisterInfo *TRI) {
+
+ // FIXME: If targets would like to look through instructions that aren't
+ // pure copies, this can be updated to a query.
+ if (!MI.isCopy())
+ return false;
+
+ const MachineFunction *MF = MI.getMF();
+ // Check that we are copying a constant physical register.
+ unsigned CopySrcReg = MI.getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(CopySrcReg))
+ return false;
+
+ if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF))
+ return false;
+
+ unsigned CopyDstReg = MI.getOperand(0).getReg();
+ // Check if any of the uses of the copy are invariant stores.
+ assert (TargetRegisterInfo::isVirtualRegister(CopyDstReg) &&
+ "copy dst is not a virtual reg");
+
+ for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) {
+ if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI))
+ return true;
+ }
+ return false;
+}
+
/// Returns true if the instruction may be a suitable candidate for LICM.
/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
-bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
+bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
// Check if it's safe to move the instruction.
bool DontMoveAcrossStore = true;
- if (!I.isSafeToMove(AA, DontMoveAcrossStore))
+ if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) &&
+ !(HoistConstStores && isInvariantStore(I, TRI, MRI))) {
return false;
+ }
// If it is load then check if it is guaranteed to execute by making sure that
// it dominates all exiting blocks. If it doesn't, then there is a path out of
@@ -896,7 +1000,7 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
/// I.e., all virtual register operands are defined outside of the loop,
/// physical registers aren't accessed explicitly, and there are no side
/// effects that aren't captured by the operands or other flags.
-bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) {
if (!IsLICMCandidate(I))
return false;
@@ -949,7 +1053,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
/// Return true if the specified instruction is used by a phi node and hoisting
/// it could cause a copy to be inserted.
-bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
+bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
SmallVector<const MachineInstr*, 8> Work(1, MI);
do {
MI = Work.pop_back_val();
@@ -984,8 +1088,9 @@ bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
/// Compute operand latency between a def of 'Reg' and an use in the current
/// loop, return true if the target considered it high.
-bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
- unsigned DefIdx, unsigned Reg) const {
+bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI,
+ unsigned DefIdx,
+ unsigned Reg) const {
if (MRI->use_nodbg_empty(Reg))
return false;
@@ -1015,7 +1120,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
/// Return true if the instruction is marked "cheap" or the operand latency
/// between its def and a use is one or less.
-bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
+bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const {
if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())
return true;
@@ -1040,8 +1145,9 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
/// Visit BBs from header to current BB, check if hoisting an instruction of the
/// given cost matrix can cause high register pressure.
-bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
- bool CheapInstr) {
+bool
+MachineLICMBase::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
+ bool CheapInstr) {
for (const auto &RPIdAndCost : Cost) {
if (RPIdAndCost.second <= 0)
continue;
@@ -1065,7 +1171,7 @@ bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
/// Traverse the back trace from header to the current block and update their
/// register pressures to reflect the effect of hoisting MI from the current
/// block to the preheader.
-void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
+void MachineLICMBase::UpdateBackTraceRegPressure(const MachineInstr *MI) {
// First compute the 'cost' of the instruction, i.e. its contribution
// to register pressure.
auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
@@ -1079,7 +1185,7 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
/// Return true if it is potentially profitable to hoist the given loop
/// invariant.
-bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
+bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
if (MI.isImplicitDef())
return true;
@@ -1095,12 +1201,15 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
// - When hoisting the last use of a value in the loop, that value no longer
// needs to be live in the loop. This lowers register pressure in the loop.
+ if (HoistConstStores && isCopyFeedingInvariantStore(MI, MRI, TRI))
+ return true;
+
bool CheapInstr = IsCheapInstruction(MI);
bool CreatesCopy = HasLoopPHIUse(&MI);
// Don't hoist a cheap instruction if it would create a copy in the loop.
if (CheapInstr && CreatesCopy) {
- DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
+ LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
return false;
}
@@ -1119,7 +1228,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
- DEBUG(dbgs() << "Hoist High Latency: " << MI);
+ LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);
++NumHighLatency;
return true;
}
@@ -1137,14 +1246,14 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
// Visit BBs from header to current BB, if hoisting this doesn't cause
// high register pressure, then it's safe to proceed.
if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
- DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
+ LLVM_DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
++NumLowRP;
return true;
}
// Don't risk increasing register pressure if it would create copies.
if (CreatesCopy) {
- DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
+ LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
return false;
}
@@ -1153,7 +1262,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
// conservative.
if (AvoidSpeculation &&
(!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
- DEBUG(dbgs() << "Won't speculate: " << MI);
+ LLVM_DEBUG(dbgs() << "Won't speculate: " << MI);
return false;
}
@@ -1161,7 +1270,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
// to be remat'ed.
if (!TII->isTriviallyReMaterializable(MI, AA) &&
!MI.isDereferenceableInvariantLoad(AA)) {
- DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
+ LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
return false;
}
@@ -1171,7 +1280,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
/// Unfold a load from the given machineinstr if the load itself could be
/// hoisted. Return the unfolded and hoistable load, or null if the load
/// couldn't be unfolded or if it wouldn't be hoistable.
-MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
+MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
// Don't unfold simple loads.
if (MI->canFoldAsLoad())
return nullptr;
@@ -1229,7 +1338,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
/// Initialize the CSE map with instructions that are in the current loop
/// preheader that may become duplicates of instructions that are hoisted
/// out of the loop.
-void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
+void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) {
for (MachineInstr &MI : *BB)
CSEMap[MI.getOpcode()].push_back(&MI);
}
@@ -1237,8 +1346,8 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
/// Find an instruction amount PrevMIs that is a duplicate of MI.
/// Return this instruction if it's found.
const MachineInstr*
-MachineLICM::LookForDuplicate(const MachineInstr *MI,
- std::vector<const MachineInstr*> &PrevMIs) {
+MachineLICMBase::LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs) {
for (const MachineInstr *PrevMI : PrevMIs)
if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
return PrevMI;
@@ -1250,15 +1359,15 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
/// computes the same value. If it's found, do a RAU on with the definition of
/// the existing instruction rather than hoisting the instruction to the
/// preheader.
-bool MachineLICM::EliminateCSE(MachineInstr *MI,
- DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI) {
+bool MachineLICMBase::EliminateCSE(MachineInstr *MI,
+ DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI) {
// Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
// the undef property onto uses.
if (CI == CSEMap.end() || MI->isImplicitDef())
return false;
if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
- DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
+ LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
// Replace virtual registers defined by MI by their counterparts defined
// by Dup.
@@ -1308,7 +1417,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
/// Return true if the given instruction will be CSE'd if it's hoisted out of
/// the loop.
-bool MachineLICM::MayCSE(MachineInstr *MI) {
+bool MachineLICMBase::MayCSE(MachineInstr *MI) {
unsigned Opcode = MI->getOpcode();
DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator
CI = CSEMap.find(Opcode);
@@ -1323,7 +1432,7 @@ bool MachineLICM::MayCSE(MachineInstr *MI) {
/// When an instruction is found to use only loop invariant operands
/// that are safe to hoist, this instruction is called to do the dirty work.
/// It returns true if the instruction is hoisted.
-bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
+bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// First check whether we should hoist this instruction.
if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
// If not, try unfolding a hoistable load.
@@ -1331,16 +1440,21 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
if (!MI) return false;
}
+ // If we have hoisted an instruction that may store, it can only be a constant
+ // store.
+ if (MI->mayStore())
+ NumStoreConst++;
+
// Now move the instructions to the predecessor, inserting it before any
// terminator instructions.
- DEBUG({
- dbgs() << "Hoisting " << *MI;
- if (MI->getParent()->getBasicBlock())
- dbgs() << " from " << printMBBReference(*MI->getParent());
- if (Preheader->getBasicBlock())
- dbgs() << " to " << printMBBReference(*Preheader);
- dbgs() << "\n";
- });
+ LLVM_DEBUG({
+ dbgs() << "Hoisting " << *MI;
+ if (MI->getParent()->getBasicBlock())
+ dbgs() << " from " << printMBBReference(*MI->getParent());
+ if (Preheader->getBasicBlock())
+ dbgs() << " to " << printMBBReference(*Preheader);
+ dbgs() << "\n";
+ });
// If this is the first instruction being hoisted to the preheader,
// initialize the CSE map with potential common expressions.
@@ -1386,7 +1500,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
}
/// Get the preheader for the current loop, splitting a critical edge if needed.
-MachineBasicBlock *MachineLICM::getCurPreheader() {
+MachineBasicBlock *MachineLICMBase::getCurPreheader() {
// Determine the block to which to hoist instructions. If we can't find a
// suitable loop predecessor, we can't do any hoisting.
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
index a9aa1d954e70..2bce59235057 100644
--- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 8f0b89657d02..054cc97f8374 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
@@ -27,6 +26,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
diff --git a/contrib/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm/lib/CodeGen/MachineOperand.cpp
index ec81c6391171..8098333832b4 100644
--- a/contrib/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineOperand.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -19,6 +20,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/ModuleSlotTracker.h"
@@ -50,6 +52,9 @@ void MachineOperand::setReg(unsigned Reg) {
if (getReg() == Reg)
return; // No change.
+ // Clear the IsRenamable bit to keep it conservatively correct.
+ IsRenamable = false;
+
// Otherwise, we have to change the register. If this operand is embedded
// into a machine function, we need to update the old and new register's
// use/def lists.
@@ -110,30 +115,27 @@ bool MachineOperand::isRenamable() const {
assert(isReg() && "Wrong MachineOperand accessor");
assert(TargetRegisterInfo::isPhysicalRegister(getReg()) &&
"isRenamable should only be checked on physical registers");
- return IsRenamable;
+ if (!IsRenamable)
+ return false;
+
+ const MachineInstr *MI = getParent();
+ if (!MI)
+ return true;
+
+ if (isDef())
+ return !MI->hasExtraDefRegAllocReq(MachineInstr::IgnoreBundle);
+
+ assert(isUse() && "Reg is not def or use");
+ return !MI->hasExtraSrcRegAllocReq(MachineInstr::IgnoreBundle);
}
void MachineOperand::setIsRenamable(bool Val) {
assert(isReg() && "Wrong MachineOperand accessor");
assert(TargetRegisterInfo::isPhysicalRegister(getReg()) &&
"setIsRenamable should only be called on physical registers");
- if (const MachineInstr *MI = getParent())
- if ((isDef() && MI->hasExtraDefRegAllocReq()) ||
- (isUse() && MI->hasExtraSrcRegAllocReq()))
- assert(!Val && "isRenamable should be false for "
- "hasExtraDefRegAllocReq/hasExtraSrcRegAllocReq opcodes");
IsRenamable = Val;
}
-void MachineOperand::setIsRenamableIfNoExtraRegAllocReq() {
- if (const MachineInstr *MI = getParent())
- if ((isDef() && MI->hasExtraDefRegAllocReq()) ||
- (isUse() && MI->hasExtraSrcRegAllocReq()))
- return;
-
- setIsRenamable(true);
-}
-
// If this operand is currently a register operand, and if this is in a
// function, deregister the operand from the register's use/def list.
void MachineOperand::removeRegFromUses() {
@@ -440,7 +442,70 @@ static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB,
OS << "<unknown>";
}
-void MachineOperand::printSubregIdx(raw_ostream &OS, uint64_t Index,
+static void printIRValueReference(raw_ostream &OS, const Value &V,
+ ModuleSlotTracker &MST) {
+ if (isa<GlobalValue>(V)) {
+ V.printAsOperand(OS, /*PrintType=*/false, MST);
+ return;
+ }
+ if (isa<Constant>(V)) {
+ // Machine memory operands can load/store to/from constant value pointers.
+ OS << '`';
+ V.printAsOperand(OS, /*PrintType=*/true, MST);
+ OS << '`';
+ return;
+ }
+ OS << "%ir.";
+ if (V.hasName()) {
+ printLLVMNameWithoutPrefix(OS, V.getName());
+ return;
+ }
+ MachineOperand::printIRSlotNumber(OS, MST.getLocalSlot(&V));
+}
+
+static void printSyncScope(raw_ostream &OS, const LLVMContext &Context,
+ SyncScope::ID SSID,
+ SmallVectorImpl<StringRef> &SSNs) {
+ switch (SSID) {
+ case SyncScope::System:
+ break;
+ default:
+ if (SSNs.empty())
+ Context.getSyncScopeNames(SSNs);
+
+ OS << "syncscope(\"";
+ printEscapedString(SSNs[SSID], OS);
+ OS << "\") ";
+ break;
+ }
+}
+
+static const char *getTargetMMOFlagName(const TargetInstrInfo &TII,
+ unsigned TMMOFlag) {
+ auto Flags = TII.getSerializableMachineMemOperandTargetFlags();
+ for (const auto &I : Flags) {
+ if (I.first == TMMOFlag) {
+ return I.second;
+ }
+ }
+ return nullptr;
+}
+
+static void printFrameIndex(raw_ostream& OS, int FrameIndex, bool IsFixed,
+ const MachineFrameInfo *MFI) {
+ StringRef Name;
+ if (MFI) {
+ IsFixed = MFI->isFixedObjectIndex(FrameIndex);
+ if (const AllocaInst *Alloca = MFI->getObjectAllocation(FrameIndex))
+ if (Alloca->hasName())
+ Name = Alloca->getName();
+ if (IsFixed)
+ FrameIndex -= MFI->getObjectIndexBegin();
+ }
+ MachineOperand::printStackObjectReference(OS, FrameIndex, IsFixed, Name);
+}
+
+void MachineOperand::printSubRegIdx(raw_ostream &OS, uint64_t Index,
const TargetRegisterInfo *TRI) {
OS << "%subreg.";
if (TRI)
@@ -639,15 +704,21 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI,
void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI,
const TargetIntrinsicInfo *IntrinsicInfo) const {
+ print(OS, LLT{}, TRI, IntrinsicInfo);
+}
+
+void MachineOperand::print(raw_ostream &OS, LLT TypeToPrint,
+ const TargetRegisterInfo *TRI,
+ const TargetIntrinsicInfo *IntrinsicInfo) const {
tryToGetTargetInfo(*this, TRI, IntrinsicInfo);
ModuleSlotTracker DummyMST(nullptr);
- print(OS, DummyMST, LLT{}, /*PrintDef=*/false,
+ print(OS, DummyMST, TypeToPrint, /*PrintDef=*/false, /*IsStandalone=*/true,
/*ShouldPrintRegisterTies=*/true,
/*TiedOperandIdx=*/0, TRI, IntrinsicInfo);
}
void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
- LLT TypeToPrint, bool PrintDef,
+ LLT TypeToPrint, bool PrintDef, bool IsStandalone,
bool ShouldPrintRegisterTies,
unsigned TiedOperandIdx,
const TargetRegisterInfo *TRI,
@@ -675,7 +746,15 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "debug-use ";
if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable())
OS << "renamable ";
- OS << printReg(Reg, TRI);
+
+ const MachineRegisterInfo *MRI = nullptr;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+ MRI = &MF->getRegInfo();
+ }
+ }
+
+ OS << printReg(Reg, TRI, 0, MRI);
// Print the sub register.
if (unsigned SubReg = getSubReg()) {
if (TRI)
@@ -687,7 +766,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
if (const MachineFunction *MF = getMFIfAvailable(*this)) {
const MachineRegisterInfo &MRI = MF->getRegInfo();
- if (!PrintDef || MRI.def_empty(Reg)) {
+ if (IsStandalone || !PrintDef || MRI.def_empty(Reg)) {
OS << ':';
OS << printRegClassOrBank(Reg, MRI, TRI);
}
@@ -716,17 +795,10 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
case MachineOperand::MO_FrameIndex: {
int FrameIndex = getIndex();
bool IsFixed = false;
- StringRef Name;
- if (const MachineFunction *MF = getMFIfAvailable(*this)) {
- const MachineFrameInfo &MFI = MF->getFrameInfo();
- IsFixed = MFI.isFixedObjectIndex(FrameIndex);
- if (const AllocaInst *Alloca = MFI.getObjectAllocation(FrameIndex))
- if (Alloca->hasName())
- Name = Alloca->getName();
- if (IsFixed)
- FrameIndex -= MFI.getObjectIndexBegin();
- }
- printStackObjectReference(OS, FrameIndex, IsFixed, Name);
+ const MachineFrameInfo *MFI = nullptr;
+ if (const MachineFunction *MF = getMFIfAvailable(*this))
+ MFI = &MF->getFrameInfo();
+ printFrameIndex(OS, FrameIndex, IsFixed, MFI);
break;
}
case MachineOperand::MO_ConstantPoolIndex:
@@ -752,7 +824,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
break;
case MachineOperand::MO_ExternalSymbol: {
StringRef Name = getSymbolName();
- OS << '$';
+ OS << '&';
if (Name.empty()) {
OS << "\"\"";
} else {
@@ -905,7 +977,7 @@ MachinePointerInfo MachinePointerInfo::getUnknownStack(MachineFunction &MF) {
}
MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
- uint64_t s, unsigned int a,
+ uint64_t s, uint64_t a,
const AAMDNodes &AAInfo,
const MDNode *Ranges, SyncScope::ID SSID,
AtomicOrdering Ordering,
@@ -961,108 +1033,121 @@ void MachineMemOperand::print(raw_ostream &OS) const {
ModuleSlotTracker DummyMST(nullptr);
print(OS, DummyMST);
}
+
void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
- assert((isLoad() || isStore()) && "SV has to be a load, store or both.");
+ SmallVector<StringRef, 0> SSNs;
+ LLVMContext Ctx;
+ print(OS, MST, SSNs, Ctx, nullptr, nullptr);
+}
+void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
+ SmallVectorImpl<StringRef> &SSNs,
+ const LLVMContext &Context,
+ const MachineFrameInfo *MFI,
+ const TargetInstrInfo *TII) const {
+ OS << '(';
if (isVolatile())
- OS << "Volatile ";
-
+ OS << "volatile ";
+ if (isNonTemporal())
+ OS << "non-temporal ";
+ if (isDereferenceable())
+ OS << "dereferenceable ";
+ if (isInvariant())
+ OS << "invariant ";
+ if (getFlags() & MachineMemOperand::MOTargetFlag1)
+ OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag1)
+ << "\" ";
+ if (getFlags() & MachineMemOperand::MOTargetFlag2)
+ OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag2)
+ << "\" ";
+ if (getFlags() & MachineMemOperand::MOTargetFlag3)
+ OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag3)
+ << "\" ";
+
+ assert((isLoad() || isStore()) &&
+ "machine memory operand must be a load or store (or both)");
if (isLoad())
- OS << "LD";
+ OS << "load ";
if (isStore())
- OS << "ST";
- OS << getSize();
+ OS << "store ";
- // Print the address information.
- OS << "[";
- if (const Value *V = getValue())
- V->printAsOperand(OS, /*PrintType=*/false, MST);
- else if (const PseudoSourceValue *PSV = getPseudoValue())
- PSV->printCustom(OS);
- else
- OS << "<unknown>";
+ printSyncScope(OS, Context, getSyncScopeID(), SSNs);
- unsigned AS = getAddrSpace();
- if (AS != 0)
- OS << "(addrspace=" << AS << ')';
-
- // If the alignment of the memory reference itself differs from the alignment
- // of the base pointer, print the base alignment explicitly, next to the base
- // pointer.
- if (getBaseAlignment() != getAlignment())
- OS << "(align=" << getBaseAlignment() << ")";
-
- if (getOffset() != 0)
- OS << "+" << getOffset();
- OS << "]";
-
- // Print the alignment of the reference.
- if (getBaseAlignment() != getAlignment() || getBaseAlignment() != getSize())
- OS << "(align=" << getAlignment() << ")";
-
- // Print TBAA info.
- if (const MDNode *TBAAInfo = getAAInfo().TBAA) {
- OS << "(tbaa=";
- if (TBAAInfo->getNumOperands() > 0)
- TBAAInfo->getOperand(0)->printAsOperand(OS, MST);
- else
- OS << "<unknown>";
- OS << ")";
- }
+ if (getOrdering() != AtomicOrdering::NotAtomic)
+ OS << toIRString(getOrdering()) << ' ';
+ if (getFailureOrdering() != AtomicOrdering::NotAtomic)
+ OS << toIRString(getFailureOrdering()) << ' ';
- // Print AA scope info.
- if (const MDNode *ScopeInfo = getAAInfo().Scope) {
- OS << "(alias.scope=";
- if (ScopeInfo->getNumOperands() > 0)
- for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) {
- ScopeInfo->getOperand(i)->printAsOperand(OS, MST);
- if (i != ie - 1)
- OS << ",";
- }
- else
- OS << "<unknown>";
- OS << ")";
+ OS << getSize();
+ if (const Value *Val = getValue()) {
+ OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
+ printIRValueReference(OS, *Val, MST);
+ } else if (const PseudoSourceValue *PVal = getPseudoValue()) {
+ OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
+ assert(PVal && "Expected a pseudo source value");
+ switch (PVal->kind()) {
+ case PseudoSourceValue::Stack:
+ OS << "stack";
+ break;
+ case PseudoSourceValue::GOT:
+ OS << "got";
+ break;
+ case PseudoSourceValue::JumpTable:
+ OS << "jump-table";
+ break;
+ case PseudoSourceValue::ConstantPool:
+ OS << "constant-pool";
+ break;
+ case PseudoSourceValue::FixedStack: {
+ int FrameIndex = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex();
+ bool IsFixed = true;
+ printFrameIndex(OS, FrameIndex, IsFixed, MFI);
+ break;
+ }
+ case PseudoSourceValue::GlobalValueCallEntry:
+ OS << "call-entry ";
+ cast<GlobalValuePseudoSourceValue>(PVal)->getValue()->printAsOperand(
+ OS, /*PrintType=*/false, MST);
+ break;
+ case PseudoSourceValue::ExternalSymbolCallEntry:
+ OS << "call-entry &";
+ printLLVMNameWithoutPrefix(
+ OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
+ break;
+ case PseudoSourceValue::TargetCustom:
+ // FIXME: This is not necessarily the correct MIR serialization format for
+ // a custom pseudo source value, but at least it allows
+ // -print-machineinstrs to work on a target with custom pseudo source
+ // values.
+ OS << "custom ";
+ PVal->printCustom(OS);
+ break;
+ }
}
-
- // Print AA noalias scope info.
- if (const MDNode *NoAliasInfo = getAAInfo().NoAlias) {
- OS << "(noalias=";
- if (NoAliasInfo->getNumOperands() > 0)
- for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) {
- NoAliasInfo->getOperand(i)->printAsOperand(OS, MST);
- if (i != ie - 1)
- OS << ",";
- }
- else
- OS << "<unknown>";
- OS << ")";
+ MachineOperand::printOperandOffset(OS, getOffset());
+ if (getBaseAlignment() != getSize())
+ OS << ", align " << getBaseAlignment();
+ auto AAInfo = getAAInfo();
+ if (AAInfo.TBAA) {
+ OS << ", !tbaa ";
+ AAInfo.TBAA->printAsOperand(OS, MST);
}
-
- if (const MDNode *Ranges = getRanges()) {
- unsigned NumRanges = Ranges->getNumOperands();
- if (NumRanges != 0) {
- OS << "(ranges=";
-
- for (unsigned I = 0; I != NumRanges; ++I) {
- Ranges->getOperand(I)->printAsOperand(OS, MST);
- if (I != NumRanges - 1)
- OS << ',';
- }
-
- OS << ')';
- }
+ if (AAInfo.Scope) {
+ OS << ", !alias.scope ";
+ AAInfo.Scope->printAsOperand(OS, MST);
}
+ if (AAInfo.NoAlias) {
+ OS << ", !noalias ";
+ AAInfo.NoAlias->printAsOperand(OS, MST);
+ }
+ if (getRanges()) {
+ OS << ", !range ";
+ getRanges()->printAsOperand(OS, MST);
+ }
+ // FIXME: Implement addrspace printing/parsing in MIR.
+ // For now, print this even though parsing it is not available in MIR.
+ if (unsigned AS = getAddrSpace())
+ OS << ", addrspace " << AS;
- if (isNonTemporal())
- OS << "(nontemporal)";
- if (isDereferenceable())
- OS << "(dereferenceable)";
- if (isInvariant())
- OS << "(invariant)";
- if (getFlags() & MOTargetFlag1)
- OS << "(flag1)";
- if (getFlags() & MOTargetFlag2)
- OS << "(flag2)";
- if (getFlags() & MOTargetFlag3)
- OS << "(flag3)";
+ OS << ')';
}
diff --git a/contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index ca4452218da1..906d5560d568 100644
--- a/contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -27,7 +27,8 @@ DiagnosticInfoMIROptimization::MachineArgument::MachineArgument(
Key = MKey;
raw_string_ostream OS(Val);
- MI.print(OS, /*SkipOpers=*/false, /*SkipDebugLoc=*/true);
+ MI.print(OS, /*IsStandalone=*/true, /*SkipOpers=*/false,
+ /*SkipDebugLoc=*/true);
}
Optional<uint64_t>
diff --git a/contrib/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp
index e4eb8802ac66..28e4e2c6c87a 100644
--- a/contrib/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -25,9 +25,8 @@
///
/// Targets must implement
/// * getOutliningCandidateInfo
-/// * insertOutlinerEpilogue
+/// * buildOutlinedFrame
/// * insertOutlinedCall
-/// * insertOutlinerPrologue
/// * isFunctionSafeToOutlineFrom
///
/// in order to make use of the MachineOutliner.
@@ -56,18 +55,22 @@
/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
///
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachineOutliner.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Mangler.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <functional>
@@ -80,121 +83,23 @@
using namespace llvm;
using namespace ore;
+using namespace outliner;
STATISTIC(NumOutlined, "Number of candidates outlined");
STATISTIC(FunctionsCreated, "Number of functions created");
-namespace {
-
-/// \brief An individual sequence of instructions to be replaced with a call to
-/// an outlined function.
-struct Candidate {
-private:
- /// The start index of this \p Candidate in the instruction list.
- unsigned StartIdx;
-
- /// The number of instructions in this \p Candidate.
- unsigned Len;
-
-public:
- /// Set to false if the candidate overlapped with another candidate.
- bool InCandidateList = true;
-
- /// \brief The index of this \p Candidate's \p OutlinedFunction in the list of
- /// \p OutlinedFunctions.
- unsigned FunctionIdx;
-
- /// Contains all target-specific information for this \p Candidate.
- TargetInstrInfo::MachineOutlinerInfo MInfo;
-
- /// Return the number of instructions in this Candidate.
- unsigned getLength() const { return Len; }
-
- /// Return the start index of this candidate.
- unsigned getStartIdx() const { return StartIdx; }
-
- // Return the end index of this candidate.
- unsigned getEndIdx() const { return StartIdx + Len - 1; }
-
- /// \brief The number of instructions that would be saved by outlining every
- /// candidate of this type.
- ///
- /// This is a fixed value which is not updated during the candidate pruning
- /// process. It is only used for deciding which candidate to keep if two
- /// candidates overlap. The true benefit is stored in the OutlinedFunction
- /// for some given candidate.
- unsigned Benefit = 0;
-
- Candidate(unsigned StartIdx, unsigned Len, unsigned FunctionIdx)
- : StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx) {}
-
- Candidate() {}
-
- /// \brief Used to ensure that \p Candidates are outlined in an order that
- /// preserves the start and end indices of other \p Candidates.
- bool operator<(const Candidate &RHS) const {
- return getStartIdx() > RHS.getStartIdx();
- }
-};
-
-/// \brief The information necessary to create an outlined function for some
-/// class of candidate.
-struct OutlinedFunction {
-
-private:
- /// The number of candidates for this \p OutlinedFunction.
- unsigned OccurrenceCount = 0;
-
-public:
- std::vector<std::shared_ptr<Candidate>> Candidates;
-
- /// The actual outlined function created.
- /// This is initialized after we go through and create the actual function.
- MachineFunction *MF = nullptr;
-
- /// A number assigned to this function which appears at the end of its name.
- unsigned Name;
+// Set to true if the user wants the outliner to run on linkonceodr linkage
+// functions. This is false by default because the linker can dedupe linkonceodr
+// functions. Since the outliner is confined to a single module (modulo LTO),
+// this is off by default. It should, however, be the default behaviour in
+// LTO.
+static cl::opt<bool> EnableLinkOnceODROutlining(
+ "enable-linkonceodr-outlining",
+ cl::Hidden,
+ cl::desc("Enable the machine outliner on linkonceodr functions"),
+ cl::init(false));
- /// \brief The sequence of integers corresponding to the instructions in this
- /// function.
- std::vector<unsigned> Sequence;
-
- /// Contains all target-specific information for this \p OutlinedFunction.
- TargetInstrInfo::MachineOutlinerInfo MInfo;
-
- /// Return the number of candidates for this \p OutlinedFunction.
- unsigned getOccurrenceCount() { return OccurrenceCount; }
-
- /// Decrement the occurrence count of this OutlinedFunction and return the
- /// new count.
- unsigned decrement() {
- assert(OccurrenceCount > 0 && "Can't decrement an empty function!");
- OccurrenceCount--;
- return getOccurrenceCount();
- }
-
- /// \brief Return the number of instructions it would take to outline this
- /// function.
- unsigned getOutliningCost() {
- return (OccurrenceCount * MInfo.CallOverhead) + Sequence.size() +
- MInfo.FrameOverhead;
- }
-
- /// \brief Return the number of instructions that would be saved by outlining
- /// this function.
- unsigned getBenefit() {
- unsigned NotOutlinedCost = OccurrenceCount * Sequence.size();
- unsigned OutlinedCost = getOutliningCost();
- return (NotOutlinedCost < OutlinedCost) ? 0
- : NotOutlinedCost - OutlinedCost;
- }
-
- OutlinedFunction(unsigned Name, unsigned OccurrenceCount,
- const std::vector<unsigned> &Sequence,
- TargetInstrInfo::MachineOutlinerInfo &MInfo)
- : OccurrenceCount(OccurrenceCount), Name(Name), Sequence(Sequence),
- MInfo(MInfo) {}
-};
+namespace {
/// Represents an undefined index in the suffix tree.
const unsigned EmptyIdx = -1;
@@ -242,7 +147,7 @@ struct SuffixTreeNode {
/// For all other nodes, this is ignored.
unsigned SuffixIdx = EmptyIdx;
- /// \brief For internal nodes, a pointer to the internal node representing
+ /// For internal nodes, a pointer to the internal node representing
/// the same sequence with the first character chopped off.
///
/// This acts as a shortcut in Ukkonen's algorithm. One of the things that
@@ -356,7 +261,7 @@ private:
/// The end index of each leaf in the tree.
unsigned LeafEndIdx = -1;
- /// \brief Helper struct which keeps track of the next insertion point in
+ /// Helper struct which keeps track of the next insertion point in
/// Ukkonen's algorithm.
struct ActiveState {
/// The next node to insert at.
@@ -369,7 +274,7 @@ private:
unsigned Len = 0;
};
- /// \brief The point the next insertion will take place at in the
+ /// The point the next insertion will take place at in the
/// construction algorithm.
ActiveState Active;
@@ -416,7 +321,7 @@ private:
return N;
}
- /// \brief Set the suffix indices of the leaves to the start indices of their
+ /// Set the suffix indices of the leaves to the start indices of their
/// respective suffixes. Also stores each leaf in \p LeafVector at its
/// respective suffix index.
///
@@ -454,7 +359,7 @@ private:
}
}
- /// \brief Construct the suffix tree for the prefix of the input ending at
+ /// Construct the suffix tree for the prefix of the input ending at
/// \p EndIdx.
///
/// Used to construct the full suffix tree iteratively. At the end of each
@@ -615,16 +520,16 @@ public:
}
};
-/// \brief Maps \p MachineInstrs to unsigned integers and stores the mappings.
+/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
struct InstructionMapper {
- /// \brief The next available integer to assign to a \p MachineInstr that
+ /// The next available integer to assign to a \p MachineInstr that
/// cannot be outlined.
///
/// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
unsigned IllegalInstrNumber = -3;
- /// \brief The next available integer to assign to a \p MachineInstr that can
+ /// The next available integer to assign to a \p MachineInstr that can
/// be outlined.
unsigned LegalInstrNumber = 0;
@@ -639,11 +544,11 @@ struct InstructionMapper {
/// The vector of unsigned integers that the module is mapped to.
std::vector<unsigned> UnsignedVec;
- /// \brief Stores the location of the instruction associated with the integer
+ /// Stores the location of the instruction associated with the integer
/// at index i in \p UnsignedVec for each index i.
std::vector<MachineBasicBlock::iterator> InstrList;
- /// \brief Maps \p *It to a legal integer.
+ /// Maps \p *It to a legal integer.
///
/// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap,
/// \p IntegerInstructionMap, and \p LegalInstrNumber.
@@ -706,7 +611,7 @@ struct InstructionMapper {
return MINumber;
}
- /// \brief Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
+ /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
/// and appends it to \p UnsignedVec and \p InstrList.
///
/// Two instructions are assigned the same integer if they are identical.
@@ -720,20 +625,29 @@ struct InstructionMapper {
void convertToUnsignedVec(MachineBasicBlock &MBB,
const TargetRegisterInfo &TRI,
const TargetInstrInfo &TII) {
+ unsigned Flags = TII.getMachineOutlinerMBBFlags(MBB);
+
for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et;
It++) {
// Keep track of where this instruction is in the module.
- switch (TII.getOutliningType(*It)) {
- case TargetInstrInfo::MachineOutlinerInstrType::Illegal:
+ switch (TII.getOutliningType(It, Flags)) {
+ case InstrType::Illegal:
mapToIllegalUnsigned(It);
break;
- case TargetInstrInfo::MachineOutlinerInstrType::Legal:
+ case InstrType::Legal:
mapToLegalUnsigned(It);
break;
- case TargetInstrInfo::MachineOutlinerInstrType::Invisible:
+ case InstrType::LegalTerminator:
+ mapToLegalUnsigned(It);
+ InstrList.push_back(It);
+ UnsignedVec.push_back(IllegalInstrNumber);
+ IllegalInstrNumber--;
+ break;
+
+ case InstrType::Invisible:
break;
}
}
@@ -757,7 +671,7 @@ struct InstructionMapper {
}
};
-/// \brief An interprocedural pass which finds repeated sequences of
+/// An interprocedural pass which finds repeated sequences of
/// instructions and replaces them with calls to functions.
///
/// Each instruction is mapped to an unsigned integer and placed in a string.
@@ -770,10 +684,19 @@ struct MachineOutliner : public ModulePass {
static char ID;
- /// \brief Set to true if the outliner should consider functions with
+ /// Set to true if the outliner should consider functions with
/// linkonceodr linkage.
bool OutlineFromLinkOnceODRs = false;
+ /// Set to true if the outliner should run on all functions in the module
+ /// considered safe for outlining.
+ /// Set to true by default for compatibility with llc's -run-pass option.
+ /// Set when the pass is constructed in TargetPassConfig.
+ bool RunOnAllFunctions = true;
+
+ // Collection of IR functions created by the outliner.
+ std::vector<Function *> CreatedIRFunctions;
+
StringRef getPassName() const override { return "Machine Outliner"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -783,27 +706,35 @@ struct MachineOutliner : public ModulePass {
ModulePass::getAnalysisUsage(AU);
}
- MachineOutliner(bool OutlineFromLinkOnceODRs = false)
- : ModulePass(ID), OutlineFromLinkOnceODRs(OutlineFromLinkOnceODRs) {
+ MachineOutliner() : ModulePass(ID) {
initializeMachineOutlinerPass(*PassRegistry::getPassRegistry());
}
+ /// Remark output explaining that not outlining a set of candidates would be
+ /// better than outlining that set.
+ void emitNotOutliningCheaperRemark(
+ unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
+ OutlinedFunction &OF);
+
+ /// Remark output explaining that a function was outlined.
+ void emitOutlinedFunctionRemark(OutlinedFunction &OF);
+
/// Find all repeated substrings that satisfy the outlining cost model.
///
/// If a substring appears at least twice, then it must be represented by
- /// an internal node which appears in at least two suffixes. Each suffix is
- /// represented by a leaf node. To do this, we visit each internal node in
- /// the tree, using the leaf children of each internal node. If an internal
- /// node represents a beneficial substring, then we use each of its leaf
- /// children to find the locations of its substring.
+ /// an internal node which appears in at least two suffixes. Each suffix
+ /// is represented by a leaf node. To do this, we visit each internal node
+ /// in the tree, using the leaf children of each internal node. If an
+ /// internal node represents a beneficial substring, then we use each of
+ /// its leaf children to find the locations of its substring.
///
/// \param ST A suffix tree to query.
/// \param TII TargetInstrInfo for the target.
/// \param Mapper Contains outlining mapping information.
/// \param[out] CandidateList Filled with candidates representing each
/// beneficial substring.
- /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each
- /// type of candidate.
+ /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
+ /// each type of candidate.
///
/// \returns The length of the longest candidate found.
unsigned
@@ -812,7 +743,7 @@ struct MachineOutliner : public ModulePass {
std::vector<std::shared_ptr<Candidate>> &CandidateList,
std::vector<OutlinedFunction> &FunctionList);
- /// \brief Replace the sequences of instructions represented by the
+ /// Replace the sequences of instructions represented by the
/// \p Candidates in \p CandidateList with calls to \p MachineFunctions
/// described in \p FunctionList.
///
@@ -852,7 +783,7 @@ struct MachineOutliner : public ModulePass {
/// Removes \p C from the candidate list, and updates its \p OutlinedFunction.
void prune(Candidate &C, std::vector<OutlinedFunction> &FunctionList);
- /// \brief Remove any overlapping candidates that weren't handled by the
+ /// Remove any overlapping candidates that weren't handled by the
/// suffix tree's pruning method.
///
/// Pruning from the suffix tree doesn't necessarily remove all overlaps.
@@ -873,6 +804,16 @@ struct MachineOutliner : public ModulePass {
/// Construct a suffix tree on the instructions in \p M and outline repeated
/// strings from that tree.
bool runOnModule(Module &M) override;
+
+ /// Return a DISubprogram for OF if one exists, and null otherwise. Helper
+ /// function for remark emission.
+ DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
+ DISubprogram *SP;
+ for (const std::shared_ptr<Candidate> &C : OF.Candidates)
+ if (C && C->getMF() && (SP = C->getMF()->getFunction().getSubprogram()))
+ return SP;
+ return nullptr;
+ }
};
} // Anonymous namespace.
@@ -880,8 +821,10 @@ struct MachineOutliner : public ModulePass {
char MachineOutliner::ID = 0;
namespace llvm {
-ModulePass *createMachineOutlinerPass(bool OutlineFromLinkOnceODRs) {
- return new MachineOutliner(OutlineFromLinkOnceODRs);
+ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions) {
+ MachineOutliner *OL = new MachineOutliner();
+ OL->RunOnAllFunctions = RunOnAllFunctions;
+ return OL;
}
} // namespace llvm
@@ -889,6 +832,65 @@ ModulePass *createMachineOutlinerPass(bool OutlineFromLinkOnceODRs) {
INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
false)
+void MachineOutliner::emitNotOutliningCheaperRemark(
+ unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
+ OutlinedFunction &OF) {
+ Candidate &C = CandidatesForRepeatedSeq.front();
+ MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
+ MORE.emit([&]() {
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
+ C.front()->getDebugLoc(), C.getMBB());
+ R << "Did not outline " << NV("Length", StringLen) << " instructions"
+ << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size())
+ << " locations."
+ << " Bytes from outlining all occurrences ("
+ << NV("OutliningCost", OF.getOutliningCost()) << ")"
+ << " >= Unoutlined instruction bytes ("
+ << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")"
+ << " (Also found at: ";
+
+ // Tell the user the other places the candidate was found.
+ for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; i++) {
+ R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
+ CandidatesForRepeatedSeq[i].front()->getDebugLoc());
+ if (i != e - 1)
+ R << ", ";
+ }
+
+ R << ")";
+ return R;
+ });
+}
+
+void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
+ MachineBasicBlock *MBB = &*OF.MF->begin();
+ MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
+ MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
+ MBB->findDebugLoc(MBB->begin()), MBB);
+ R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by "
+ << "outlining " << NV("Length", OF.Sequence.size()) << " instructions "
+ << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
+ << " locations. "
+ << "(Found at: ";
+
+ // Tell the user the other places the candidate was found.
+ for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
+
+ // Skip over things that were pruned.
+ if (!OF.Candidates[i]->InCandidateList)
+ continue;
+
+ R << NV((Twine("StartLoc") + Twine(i)).str(),
+ OF.Candidates[i]->front()->getDebugLoc());
+ if (i != e - 1)
+ R << ", ";
+ }
+
+ R << ")";
+
+ MORE.emit(R);
+}
+
unsigned MachineOutliner::findCandidates(
SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper,
std::vector<std::shared_ptr<Candidate>> &CandidateList,
@@ -923,14 +925,6 @@ unsigned MachineOutliner::findCandidates(
// this vector.
std::vector<Candidate> CandidatesForRepeatedSeq;
- // Describes the start and end point of each candidate. This allows the
- // target to infer some information about each occurrence of each repeated
- // sequence.
- // FIXME: CandidatesForRepeatedSeq and this should be combined.
- std::vector<
- std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
- RepeatedSequenceLocs;
-
// Figure out the call overhead for each instance of the sequence.
for (auto &ChildPair : Parent.Children) {
SuffixTreeNode *M = ChildPair.second;
@@ -966,17 +960,18 @@ unsigned MachineOutliner::findCandidates(
CandidatesForRepeatedSeq.end(),
[&StartIdx, &EndIdx](const Candidate &C) {
return (EndIdx < C.getStartIdx() ||
- StartIdx > C.getEndIdx());
+ StartIdx > C.getEndIdx());
})) {
// It doesn't overlap with anything, so we can outline it.
// Each sequence is over [StartIt, EndIt].
+ // Save the candidate and its location.
+
MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
- // Save the candidate and its location.
- CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen,
+ CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
+ EndIt, StartIt->getParent(),
FunctionList.size());
- RepeatedSequenceLocs.emplace_back(std::make_pair(StartIt, EndIt));
}
}
}
@@ -984,69 +979,33 @@ unsigned MachineOutliner::findCandidates(
// We've found something we might want to outline.
// Create an OutlinedFunction to store it and check if it'd be beneficial
// to outline.
- TargetInstrInfo::MachineOutlinerInfo MInfo =
- TII.getOutlininingCandidateInfo(RepeatedSequenceLocs);
+ OutlinedFunction OF =
+ TII.getOutliningCandidateInfo(CandidatesForRepeatedSeq);
+
+ // If we deleted every candidate, then there's nothing to outline.
+ if (OF.Candidates.empty())
+ continue;
+
std::vector<unsigned> Seq;
for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
Seq.push_back(ST.Str[i]);
- OutlinedFunction OF(FunctionList.size(), CandidatesForRepeatedSeq.size(),
- Seq, MInfo);
- unsigned Benefit = OF.getBenefit();
+ OF.Sequence = Seq;
+ OF.Name = FunctionList.size();
// Is it better to outline this candidate than not?
- if (Benefit < 1) {
- // Outlining this candidate would take more instructions than not
- // outlining.
- // Emit a remark explaining why we didn't outline this candidate.
- std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator> C =
- RepeatedSequenceLocs[0];
- MachineOptimizationRemarkEmitter MORE(
- *(C.first->getParent()->getParent()), nullptr);
- MORE.emit([&]() {
- MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
- C.first->getDebugLoc(),
- C.first->getParent());
- R << "Did not outline " << NV("Length", StringLen) << " instructions"
- << " from " << NV("NumOccurrences", RepeatedSequenceLocs.size())
- << " locations."
- << " Instructions from outlining all occurrences ("
- << NV("OutliningCost", OF.getOutliningCost()) << ")"
- << " >= Unoutlined instruction count ("
- << NV("NotOutliningCost", StringLen * OF.getOccurrenceCount()) << ")"
- << " (Also found at: ";
-
- // Tell the user the other places the candidate was found.
- for (unsigned i = 1, e = RepeatedSequenceLocs.size(); i < e; i++) {
- R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
- RepeatedSequenceLocs[i].first->getDebugLoc());
- if (i != e - 1)
- R << ", ";
- }
-
- R << ")";
- return R;
- });
-
- // Move to the next candidate.
+ if (OF.getBenefit() < 1) {
+ emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, OF);
continue;
}
if (StringLen > MaxLen)
MaxLen = StringLen;
- // At this point, the candidate class is seen as beneficial. Set their
- // benefit values and save them in the candidate list.
- std::vector<std::shared_ptr<Candidate>> CandidatesForFn;
- for (Candidate &C : CandidatesForRepeatedSeq) {
- C.Benefit = Benefit;
- C.MInfo = MInfo;
- std::shared_ptr<Candidate> Cptr = std::make_shared<Candidate>(C);
- CandidateList.push_back(Cptr);
- CandidatesForFn.push_back(Cptr);
- }
-
+ // The function is beneficial. Save its candidates to the candidate list
+ // for pruning.
+ for (std::shared_ptr<Candidate> &C : OF.Candidates)
+ CandidateList.push_back(C);
FunctionList.push_back(OF);
- FunctionList.back().Candidates = CandidatesForFn;
// Move to the next function.
Parent.IsInTree = false;
@@ -1067,11 +1026,11 @@ void MachineOutliner::prune(Candidate &C,
// Remove C from the CandidateList.
C.InCandidateList = false;
- DEBUG(dbgs() << "- Removed a Candidate \n";
- dbgs() << "--- Num fns left for candidate: " << F.getOccurrenceCount()
- << "\n";
- dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit()
- << "\n";);
+ LLVM_DEBUG(dbgs() << "- Removed a Candidate \n";
+ dbgs() << "--- Num fns left for candidate: "
+ << F.getOccurrenceCount() << "\n";
+ dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit()
+ << "\n";);
}
void MachineOutliner::pruneOverlaps(
@@ -1119,7 +1078,7 @@ void MachineOutliner::pruneOverlaps(
if (C1.getStartIdx() > MaxCandidateLen)
FarthestPossibleIdx = C1.getStartIdx() - MaxCandidateLen;
- // Compare against the candidates in the list that start at at most
+ // Compare against the candidates in the list that start at most
// FarthestPossibleIdx indices away from C1. There are at most
// MaxCandidateLen of these.
for (auto Sit = It + 1; Sit != Et; Sit++) {
@@ -1205,9 +1164,20 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
// NOTE: If this is linkonceodr, then we can take advantage of linker deduping
// which gives us better results when we outline from linkonceodr functions.
- F->setLinkage(GlobalValue::PrivateLinkage);
+ F->setLinkage(GlobalValue::InternalLinkage);
F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ // FIXME: Set nounwind, so we don't generate eh_frame? Haven't verified it's
+ // necessary.
+
+ // Set optsize/minsize, so we don't insert padding between outlined
+ // functions.
+ F->addFnAttr(Attribute::OptimizeForSize);
+ F->addFnAttr(Attribute::MinSize);
+
+ // Save F so that we can add debug info later if we need to.
+ CreatedIRFunctions.push_back(F);
+
BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
IRBuilder<> Builder(EntryBB);
Builder.CreateRetVoid();
@@ -1221,8 +1191,6 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
// Insert the new function into the module.
MF.insert(MF.begin(), &MBB);
- TII.insertOutlinerPrologue(MBB, MF, OF.MInfo);
-
// Copy over the instructions for the function using the integer mappings in
// its sequence.
for (unsigned Str : OF.Sequence) {
@@ -1231,13 +1199,53 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
NewMI->dropMemRefs();
// Don't keep debug information for outlined instructions.
- // FIXME: This means outlined functions are currently undebuggable.
NewMI->setDebugLoc(DebugLoc());
MBB.insert(MBB.end(), NewMI);
}
- TII.insertOutlinerEpilogue(MBB, MF, OF.MInfo);
+ TII.buildOutlinedFrame(MBB, MF, OF);
+
+ // If there's a DISubprogram associated with this outlined function, then
+ // emit debug info for the outlined function.
+ if (DISubprogram *SP = getSubprogramOrNull(OF)) {
+ // We have a DISubprogram. Get its DICompileUnit.
+ DICompileUnit *CU = SP->getUnit();
+ DIBuilder DB(M, true, CU);
+ DIFile *Unit = SP->getFile();
+ Mangler Mg;
+
+ // Walk over each IR function we created in the outliner and create
+ // DISubprograms for each function.
+ for (Function *F : CreatedIRFunctions) {
+ // Get the mangled name of the function for the linkage name.
+ std::string Dummy;
+ llvm::raw_string_ostream MangledNameStream(Dummy);
+ Mg.getNameWithPrefix(MangledNameStream, F, false);
+
+ DISubprogram *SP = DB.createFunction(
+ Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
+ Unit /* File */,
+ 0 /* Line 0 is reserved for compiler-generated code. */,
+ DB.createSubroutineType(
+ DB.getOrCreateTypeArray(None)), /* void type */
+ false, true, 0, /* Line 0 is reserved for compiler-generated code. */
+ DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
+ true /* Outlined code is optimized code by definition. */);
+
+ // Don't add any new variables to the subprogram.
+ DB.finalizeSubprogram(SP);
+
+ // Attach subprogram to the function.
+ F->setSubprogram(SP);
+ }
+
+ // We're done with the DIBuilder.
+ DB.finalize();
+ }
+ // Outlined functions shouldn't preserve liveness.
+ MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
+ MF.getRegInfo().freezeReservedRegs(MF);
return &MF;
}
@@ -1260,79 +1268,73 @@ bool MachineOutliner::outline(
if (OF.getBenefit() < 1)
continue;
- // If not, then outline it.
- assert(C.getStartIdx() < Mapper.InstrList.size() &&
- "Candidate out of bounds!");
- MachineBasicBlock *MBB = (*Mapper.InstrList[C.getStartIdx()]).getParent();
- MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.getStartIdx()];
- unsigned EndIdx = C.getEndIdx();
-
- assert(EndIdx < Mapper.InstrList.size() && "Candidate out of bounds!");
- MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
- assert(EndIt != MBB->end() && "EndIt out of bounds!");
-
- EndIt++; // Erase needs one past the end index.
-
// Does this candidate have a function yet?
if (!OF.MF) {
OF.MF = createOutlinedFunction(M, OF, Mapper);
- MachineBasicBlock *MBB = &*OF.MF->begin();
-
- // Output a remark telling the user that an outlined function was created,
- // and explaining where it came from.
- MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
- MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
- MBB->findDebugLoc(MBB->begin()), MBB);
- R << "Saved " << NV("OutliningBenefit", OF.getBenefit())
- << " instructions by "
- << "outlining " << NV("Length", OF.Sequence.size()) << " instructions "
- << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
- << " locations. "
- << "(Found at: ";
-
- // Tell the user the other places the candidate was found.
- for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
-
- // Skip over things that were pruned.
- if (!OF.Candidates[i]->InCandidateList)
- continue;
-
- R << NV(
- (Twine("StartLoc") + Twine(i)).str(),
- Mapper.InstrList[OF.Candidates[i]->getStartIdx()]->getDebugLoc());
- if (i != e - 1)
- R << ", ";
- }
-
- R << ")";
-
- MORE.emit(R);
+ emitOutlinedFunctionRemark(OF);
FunctionsCreated++;
}
MachineFunction *MF = OF.MF;
+ MachineBasicBlock &MBB = *C.getMBB();
+ MachineBasicBlock::iterator StartIt = C.front();
+ MachineBasicBlock::iterator EndIt = C.back();
+ assert(StartIt != C.getMBB()->end() && "StartIt out of bounds!");
+ assert(EndIt != C.getMBB()->end() && "EndIt out of bounds!");
+
const TargetSubtargetInfo &STI = MF->getSubtarget();
const TargetInstrInfo &TII = *STI.getInstrInfo();
// Insert a call to the new function and erase the old sequence.
- TII.insertOutlinedCall(M, *MBB, StartIt, *MF, C.MInfo);
- StartIt = Mapper.InstrList[C.getStartIdx()];
- MBB->erase(StartIt, EndIt);
+ auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *OF.MF, C);
+
+ // If the caller tracks liveness, then we need to make sure that anything
+ // we outline doesn't break liveness assumptions.
+ // The outlined functions themselves currently don't track liveness, but
+ // we should make sure that the ranges we yank things out of aren't
+ // wrong.
+ if (MBB.getParent()->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksLiveness)) {
+ // Helper lambda for adding implicit def operands to the call instruction.
+ auto CopyDefs = [&CallInst](MachineInstr &MI) {
+ for (MachineOperand &MOP : MI.operands()) {
+ // Skip over anything that isn't a register.
+ if (!MOP.isReg())
+ continue;
+
+ // If it's a def, add it to the call instruction.
+ if (MOP.isDef())
+ CallInst->addOperand(
+ MachineOperand::CreateReg(MOP.getReg(), true, /* isDef = true */
+ true /* isImp = true */));
+ }
+ };
+
+ // Copy over the defs in the outlined range.
+ // First inst in outlined range <-- Anything that's defined in this
+ // ... .. range has to be added as an implicit
+ // Last inst in outlined range <-- def to the call instruction.
+ std::for_each(CallInst, std::next(EndIt), CopyDefs);
+ }
+ // Erase from the point after where the call was inserted up to, and
+ // including, the final instruction in the sequence.
+ // Erase needs one past the end, so we need std::next there too.
+ MBB.erase(std::next(StartIt), std::next(EndIt));
OutlinedSomething = true;
// Statistics.
NumOutlined++;
}
- DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
+ LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
return OutlinedSomething;
}
bool MachineOutliner::runOnModule(Module &M) {
-
- // Is there anything in the module at all?
+ // Check if there's anything in the module. If it's empty, then there's
+ // nothing to outline.
if (M.empty())
return false;
@@ -1342,25 +1344,67 @@ bool MachineOutliner::runOnModule(Module &M) {
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
const TargetInstrInfo *TII = STI.getInstrInfo();
+ // If the user passed -enable-machine-outliner=always or
+ // -enable-machine-outliner, the pass will run on all functions in the module.
+ // Otherwise, if the target supports default outlining, it will run on all
+ // functions deemed by the target to be worth outlining from by default. Tell
+ // the user how the outliner is running.
+ LLVM_DEBUG(
+ dbgs() << "Machine Outliner: Running on ";
+ if (RunOnAllFunctions)
+ dbgs() << "all functions";
+ else
+ dbgs() << "target-default functions";
+ dbgs() << "\n"
+ );
+
+ // If the user specifies that they want to outline from linkonceodrs, set
+ // it here.
+ OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
+
InstructionMapper Mapper;
- // Build instruction mappings for each function in the module.
+ // Build instruction mappings for each function in the module. Start by
+ // iterating over each Function in M.
for (Function &F : M) {
- MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
- // Is the function empty? Safe to outline from?
- if (F.empty() ||
- !TII->isFunctionSafeToOutlineFrom(MF, OutlineFromLinkOnceODRs))
+ // If there's nothing in F, then there's no reason to try and outline from
+ // it.
+ if (F.empty())
+ continue;
+
+ // There's something in F. Check if it has a MachineFunction associated with
+ // it.
+ MachineFunction *MF = MMI.getMachineFunction(F);
+
+ // If it doesn't, then there's nothing to outline from. Move to the next
+ // Function.
+ if (!MF)
+ continue;
+
+ if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF))
continue;
- // If it is, look at each MachineBasicBlock in the function.
- for (MachineBasicBlock &MBB : MF) {
+ // We have a MachineFunction. Ask the target if it's suitable for outlining.
+ // If it isn't, then move on to the next Function in the module.
+ if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs))
+ continue;
- // Is there anything in MBB?
+ // We have a function suitable for outlining. Iterate over every
+ // MachineBasicBlock in MF and try to map its instructions to a list of
+ // unsigned integers.
+ for (MachineBasicBlock &MBB : *MF) {
+ // If there isn't anything in MBB, then there's no point in outlining from
+ // it.
if (MBB.empty())
continue;
- // If yes, map it.
+ // Check if MBB could be the target of an indirect branch. If it is, then
+ // we don't want to outline from it.
+ if (MBB.hasAddressTaken())
+ continue;
+
+ // MBB is suitable for outlining. Map it to a list of unsigneds.
Mapper.convertToUnsignedVec(MBB, *TRI, *TII);
}
}
@@ -1378,5 +1422,7 @@ bool MachineOutliner::runOnModule(Module &M) {
pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII);
// Outline each of the candidates and return true if something was outlined.
- return outline(M, CandidateList, FunctionList, Mapper);
+ bool OutlinedSomething = outline(M, CandidateList, FunctionList, Mapper);
+
+ return OutlinedSomething;
}
diff --git a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
index 18cb9af499a6..9bb00aaef86d 100644
--- a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -10,14 +10,14 @@
// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
//
// Software pipelining (SWP) is an instruction scheduling technique for loops
-// that overlap loop iterations and explioits ILP via a compiler transformation.
+// that overlap loop iterations and exploits ILP via a compiler transformation.
//
// Swing Modulo Scheduling is an implementation of software pipelining
// that generates schedules that are near optimal in terms of initiation
// interval, register requirements, and stage count. See the papers:
//
// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
-// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Processings of the 1996
+// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
// Conference on Parallel Architectures and Compilation Techiniques.
//
// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
@@ -93,6 +93,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
@@ -125,6 +126,7 @@ using namespace llvm;
STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
STATISTIC(NumPipelined, "Number of loops software pipelined");
+STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
/// A command line option to turn software pipelining on or off.
static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
@@ -138,7 +140,7 @@ static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
/// A command line argument to limit minimum initial interval for pipelining.
static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
- cl::desc("Size limit for the the MII."),
+ cl::desc("Size limit for the MII."),
cl::Hidden, cl::init(27));
/// A command line argument to limit the number of stages in the pipeline.
@@ -217,6 +219,7 @@ public:
}
private:
+ void preprocessPhiNodes(MachineBasicBlock &B);
bool canPipelineLoop(MachineLoop &L);
bool scheduleLoop(MachineLoop &L);
bool swingModuloScheduler(MachineLoop &L);
@@ -241,6 +244,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
struct NodeInfo {
int ASAP = 0;
int ALAP = 0;
+ int ZeroLatencyDepth = 0;
+ int ZeroLatencyHeight = 0;
NodeInfo() = default;
};
@@ -313,15 +318,27 @@ public:
/// Return the latest time an instruction my be scheduled.
int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
- /// The mobility function, which the the number of slots in which
+ /// The mobility function, which the number of slots in which
/// an instruction may be scheduled.
int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
/// The depth, in the dependence graph, for a node.
- int getDepth(SUnit *Node) { return Node->getDepth(); }
+ unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
+
+ /// The maximum unweighted length of a path from an arbitrary node to the
+ /// given node in which each edge has latency 0
+ int getZeroLatencyDepth(SUnit *Node) {
+ return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
+ }
/// The height, in the dependence graph, for a node.
- int getHeight(SUnit *Node) { return Node->getHeight(); }
+ unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
+
+ /// The maximum unweighted length of a path from the given node to an
+ /// arbitrary node in which each edge has latency 0
+ int getZeroLatencyHeight(SUnit *Node) {
+ return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
+ }
/// Return true if the dependence is a back-edge in the data dependence graph.
/// Since the DAG doesn't contain cycles, we represent a cycle in the graph
@@ -332,29 +349,7 @@ public:
return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
}
- /// Return true if the dependence is an order dependence between non-Phis.
- static bool isOrder(SUnit *Source, const SDep &Dep) {
- if (Dep.getKind() != SDep::Order)
- return false;
- return (!Source->getInstr()->isPHI() &&
- !Dep.getSUnit()->getInstr()->isPHI());
- }
-
- bool isLoopCarriedOrder(SUnit *Source, const SDep &Dep, bool isSucc = true);
-
- /// The latency of the dependence.
- unsigned getLatency(SUnit *Source, const SDep &Dep) {
- // Anti dependences represent recurrences, so use the latency of the
- // instruction on the back-edge.
- if (Dep.getKind() == SDep::Anti) {
- if (Source->getInstr()->isPHI())
- return Dep.getSUnit()->Latency;
- if (Dep.getSUnit()->getInstr()->isPHI())
- return Source->Latency;
- return Dep.getLatency();
- }
- return Dep.getLatency();
- }
+ bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true);
/// The distance function, which indicates that operation V of iteration I
/// depends on operations U of iteration I-distance.
@@ -404,6 +399,7 @@ private:
void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
SetVector<SUnit *> &NodesAdded);
void computeNodeOrder(NodeSetType &NodeSets);
+ void checkValidNodeOrder(const NodeSetType &Circuits) const;
bool schedulePipeline(SMSchedule &Schedule);
void generatePipelinedLoop(SMSchedule &Schedule);
void generateProlog(SMSchedule &Schedule, unsigned LastStage,
@@ -438,7 +434,7 @@ private:
unsigned InstStageNum,
SMSchedule &Schedule);
void updateInstruction(MachineInstr *NewMI, bool LastDef,
- unsigned CurStageNum, unsigned InstStageNum,
+ unsigned CurStageNum, unsigned InstrStageNum,
SMSchedule &Schedule, ValueMapTy *VRMap);
MachineInstr *findDefInLoop(unsigned Reg);
unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
@@ -465,15 +461,22 @@ class NodeSet {
bool HasRecurrence = false;
unsigned RecMII = 0;
int MaxMOV = 0;
- int MaxDepth = 0;
+ unsigned MaxDepth = 0;
unsigned Colocate = 0;
SUnit *ExceedPressure = nullptr;
+ unsigned Latency = 0;
public:
using iterator = SetVector<SUnit *>::const_iterator;
NodeSet() = default;
- NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {}
+ NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
+ Latency = 0;
+ for (unsigned i = 0, e = Nodes.size(); i < e; ++i)
+ for (const SDep &Succ : Nodes[i]->Succs)
+ if (Nodes.count(Succ.getSUnit()))
+ Latency += Succ.getLatency();
+ }
bool insert(SUnit *SU) { return Nodes.insert(SU); }
@@ -513,6 +516,10 @@ public:
}
}
+ unsigned getLatency() { return Latency; }
+
+ unsigned getMaxDepth() { return MaxDepth; }
+
void clear() {
Nodes.clear();
RecMII = 0;
@@ -563,7 +570,7 @@ public:
#endif
};
-/// This class repesents the scheduled code. The main data structure is a
+/// This class represents the scheduled code. The main data structure is a
/// map from scheduled cycle to instructions. During scheduling, the
/// data structure explicitly represents all stages/iterations. When
/// the algorithm finshes, the schedule is collapsed into a single stage,
@@ -700,10 +707,10 @@ public:
bool isValidSchedule(SwingSchedulerDAG *SSD);
void finalizeSchedule(SwingSchedulerDAG *SSD);
- bool orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
+ void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
std::deque<SUnit *> &Insts);
bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
- bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Inst,
+ bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def,
MachineOperand &MO);
void print(raw_ostream &os) const;
void dump() const;
@@ -804,20 +811,41 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
if (!L.getLoopPreheader())
return false;
- // If any of the Phis contain subregs, then we can't pipeline
- // because we don't know how to maintain subreg information in the
- // VMap structure.
- MachineBasicBlock *MBB = L.getHeader();
- for (MachineBasicBlock::iterator BBI = MBB->instr_begin(),
- BBE = MBB->getFirstNonPHI();
- BBI != BBE; ++BBI)
- for (unsigned i = 1; i != BBI->getNumOperands(); i += 2)
- if (BBI->getOperand(i).getSubReg() != 0)
- return false;
-
+ // Remove any subregisters from inputs to phi nodes.
+ preprocessPhiNodes(*L.getHeader());
return true;
}
+void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
+
+ for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
+ MachineOperand &DefOp = PI.getOperand(0);
+ assert(DefOp.getSubReg() == 0);
+ auto *RC = MRI.getRegClass(DefOp.getReg());
+
+ for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
+ MachineOperand &RegOp = PI.getOperand(i);
+ if (RegOp.getSubReg() == 0)
+ continue;
+
+ // If the operand uses a subregister, replace it with a new register
+ // without subregisters, and generate a copy to the new register.
+ unsigned NewReg = MRI.createVirtualRegister(RC);
+ MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
+ MachineBasicBlock::iterator At = PredB.getFirstTerminator();
+ const DebugLoc &DL = PredB.findDebugLoc(At);
+ auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
+ .addReg(RegOp.getReg(), getRegState(RegOp),
+ RegOp.getSubReg());
+ Slots.insertMachineInstrInMaps(*Copy);
+ RegOp.setReg(NewReg);
+ RegOp.setSubReg(0);
+ }
+ }
+}
+
/// The SMS algorithm consists of the following main steps:
/// 1. Computation and analysis of the dependence graph.
/// 2. Ordering of the nodes (instructions).
@@ -858,13 +886,14 @@ void SwingSchedulerDAG::schedule() {
Topo.InitDAGTopologicalSorting();
postprocessDAG();
changeDependences();
- DEBUG({
+ LLVM_DEBUG({
for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(this);
});
NodeSetType NodeSets;
findCircuits(NodeSets);
+ NodeSetType Circuits = NodeSets;
// Calculate the MII.
unsigned ResMII = calculateResMII();
@@ -877,8 +906,8 @@ void SwingSchedulerDAG::schedule() {
RecMII = 0;
MII = std::max(ResMII, RecMII);
- DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII << ", res=" << ResMII
- << ")\n");
+ LLVM_DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII
+ << ", res=" << ResMII << ")\n");
// Can't schedule a loop without a valid MII.
if (MII == 0)
@@ -896,20 +925,20 @@ void SwingSchedulerDAG::schedule() {
checkNodeSets(NodeSets);
- DEBUG({
+ LLVM_DEBUG({
for (auto &I : NodeSets) {
dbgs() << " Rec NodeSet ";
I.dump();
}
});
- std::sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
+ std::stable_sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
groupRemainingNodes(NodeSets);
removeDuplicateNodes(NodeSets);
- DEBUG({
+ LLVM_DEBUG({
for (auto &I : NodeSets) {
dbgs() << " NodeSet ";
I.dump();
@@ -918,6 +947,9 @@ void SwingSchedulerDAG::schedule() {
computeNodeOrder(NodeSets);
+ // check for node order issues
+ checkValidNodeOrder(Circuits);
+
SMSchedule Schedule(Pass.MF);
Scheduled = schedulePipeline(Schedule);
@@ -972,7 +1004,7 @@ static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
return 0;
}
-/// Return the Phi register value that comes the the loop block.
+/// Return the Phi register value that comes the loop block.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
if (Phi.getOperand(i + 1).getMBB() == LoopBB)
@@ -1022,6 +1054,13 @@ static void getUnderlyingObjects(MachineInstr *MI,
if (!MM->getValue())
return;
GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
+ for (Value *V : Objs) {
+ if (!isIdentifiedObject(V)) {
+ Objs.clear();
+ return;
+ }
+ Objs.push_back(V);
+ }
}
/// Add a chain edge between a load and store if the store can be an
@@ -1030,6 +1069,8 @@ static void getUnderlyingObjects(MachineInstr *MI,
/// but that code doesn't create loop carried dependences.
void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads;
+ Value *UnknownValue =
+ UndefValue::get(Type::getVoidTy(MF.getFunction().getContext()));
for (auto &SU : SUnits) {
MachineInstr &MI = *SU.getInstr();
if (isDependenceBarrier(MI, AA))
@@ -1037,6 +1078,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
else if (MI.mayLoad()) {
SmallVector<Value *, 4> Objs;
getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+ if (Objs.empty())
+ Objs.push_back(UnknownValue);
for (auto V : Objs) {
SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
SUs.push_back(&SU);
@@ -1044,6 +1087,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
} else if (MI.mayStore()) {
SmallVector<Value *, 4> Objs;
getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+ if (Objs.empty())
+ Objs.push_back(UnknownValue);
for (auto V : Objs) {
MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I =
PendingLoads.find(V);
@@ -1058,33 +1103,39 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
// offset, then mark the dependence as loop carried potentially.
unsigned BaseReg1, BaseReg2;
int64_t Offset1, Offset2;
- if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) ||
- !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
- SU.addPred(SDep(Load, SDep::Barrier));
- continue;
- }
- if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
- assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
- "What happened to the chain edge?");
- SU.addPred(SDep(Load, SDep::Barrier));
- continue;
+ if (TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) &&
+ TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
+ if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
+ assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
+ "What happened to the chain edge?");
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
+ continue;
+ }
}
// Second, the more expensive check that uses alias analysis on the
// base registers. If they alias, and the load offset is less than
// the store offset, the mark the dependence as loop carried.
if (!AA) {
- SU.addPred(SDep(Load, SDep::Barrier));
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
continue;
}
MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
MachineMemOperand *MMO2 = *MI.memoperands_begin();
if (!MMO1->getValue() || !MMO2->getValue()) {
- SU.addPred(SDep(Load, SDep::Barrier));
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
continue;
}
if (MMO1->getValue() == MMO2->getValue() &&
MMO1->getOffset() <= MMO2->getOffset()) {
- SU.addPred(SDep(Load, SDep::Barrier));
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
continue;
}
AliasResult AAResult = AA->alias(
@@ -1093,8 +1144,11 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
MMO2->getAAInfo()));
- if (AAResult != NoAlias)
- SU.addPred(SDep(Load, SDep::Barrier));
+ if (AAResult != NoAlias) {
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
+ }
}
}
}
@@ -1136,6 +1190,7 @@ void SwingSchedulerDAG::updatePhiDependences() {
if (SU != nullptr && UseMI->isPHI()) {
if (!MI->isPHI()) {
SDep Dep(SU, SDep::Anti, Reg);
+ Dep.setLatency(1);
I.addPred(Dep);
} else {
HasPhiDef = Reg;
@@ -1382,7 +1437,7 @@ unsigned SwingSchedulerDAG::calculateResMII() {
/// Iterate over each circuit. Compute the delay(c) and distance(c)
/// for each circuit. The II needs to satisfy the inequality
/// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
-/// II that satistifies the inequality, and the RecMII is the maximum
+/// II that satisfies the inequality, and the RecMII is the maximum
/// of those values.
unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
unsigned RecMII = 0;
@@ -1391,7 +1446,7 @@ unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
if (Nodes.empty())
continue;
- unsigned Delay = Nodes.size() - 1;
+ unsigned Delay = Nodes.getLatency();
unsigned Distance = 1;
// ii = ceil(delay / distance)
@@ -1437,10 +1492,23 @@ static void swapAntiDependences(std::vector<SUnit> &SUnits) {
void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
SwingSchedulerDAG *DAG) {
BitVector Added(SUnits.size());
+ DenseMap<int, int> OutputDeps;
for (int i = 0, e = SUnits.size(); i != e; ++i) {
Added.reset();
// Add any successor to the adjacency matrix and exclude duplicates.
for (auto &SI : SUnits[i].Succs) {
+ // Only create a back-edge on the first and last nodes of a dependence
+ // chain. This records any chains and adds them later.
+ if (SI.getKind() == SDep::Output) {
+ int N = SI.getSUnit()->NodeNum;
+ int BackEdge = i;
+ auto Dep = OutputDeps.find(BackEdge);
+ if (Dep != OutputDeps.end()) {
+ BackEdge = Dep->second;
+ OutputDeps.erase(Dep);
+ }
+ OutputDeps[N] = BackEdge;
+ }
// Do not process a boundary node and a back-edge is processed only
// if it goes to a Phi.
if (SI.getSUnit()->isBoundaryNode() ||
@@ -1456,7 +1524,7 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
// adjacency matrix.
for (auto &PI : SUnits[i].Preds) {
if (!SUnits[i].getInstr()->mayStore() ||
- !DAG->isLoopCarriedOrder(&SUnits[i], PI, false))
+ !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
continue;
if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
int N = PI.getSUnit()->NodeNum;
@@ -1467,6 +1535,12 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
}
}
}
+ // Add back-eges in the adjacency matrix for the output dependences.
+ for (auto &OD : OutputDeps)
+ if (!Added.test(OD.second)) {
+ AdjK[OD.first].push_back(OD.second);
+ Added.set(OD.second);
+ }
}
/// Identify an elementary circuit in the dependence graph starting at the
@@ -1543,7 +1617,7 @@ void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
}
/// Return true for DAG nodes that we ignore when computing the cost functions.
-/// We ignore the back-edge recurrence in order to avoid unbounded recurison
+/// We ignore the back-edge recurrence in order to avoid unbounded recursion
/// in the calculation of the ASAP, ALAP, etc functions.
static bool ignoreDependence(const SDep &D, bool isPred) {
if (D.isArtificial())
@@ -1560,7 +1634,7 @@ static bool ignoreDependence(const SDep &D, bool isPred) {
void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
ScheduleInfo.resize(SUnits.size());
- DEBUG({
+ LLVM_DEBUG({
for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
E = Topo.end();
I != E; ++I) {
@@ -1570,49 +1644,59 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
});
int maxASAP = 0;
- // Compute ASAP.
+ // Compute ASAP and ZeroLatencyDepth.
for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
E = Topo.end();
I != E; ++I) {
int asap = 0;
+ int zeroLatencyDepth = 0;
SUnit *SU = &SUnits[*I];
for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
EP = SU->Preds.end();
IP != EP; ++IP) {
+ SUnit *pred = IP->getSUnit();
+ if (IP->getLatency() == 0)
+ zeroLatencyDepth =
+ std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
if (ignoreDependence(*IP, true))
continue;
- SUnit *pred = IP->getSUnit();
- asap = std::max(asap, (int)(getASAP(pred) + getLatency(SU, *IP) -
+ asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
getDistance(pred, SU, *IP) * MII));
}
maxASAP = std::max(maxASAP, asap);
ScheduleInfo[*I].ASAP = asap;
+ ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth;
}
- // Compute ALAP and MOV.
+ // Compute ALAP, ZeroLatencyHeight, and MOV.
for (ScheduleDAGTopologicalSort::const_reverse_iterator I = Topo.rbegin(),
E = Topo.rend();
I != E; ++I) {
int alap = maxASAP;
+ int zeroLatencyHeight = 0;
SUnit *SU = &SUnits[*I];
for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
ES = SU->Succs.end();
IS != ES; ++IS) {
+ SUnit *succ = IS->getSUnit();
+ if (IS->getLatency() == 0)
+ zeroLatencyHeight =
+ std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
if (ignoreDependence(*IS, true))
continue;
- SUnit *succ = IS->getSUnit();
- alap = std::min(alap, (int)(getALAP(succ) - getLatency(SU, *IS) +
+ alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
getDistance(SU, succ, *IS) * MII));
}
ScheduleInfo[*I].ALAP = alap;
+ ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
}
// After computing the node functions, compute the summary for each node set.
for (NodeSet &I : NodeSets)
I.computeNodeSetInfo(this);
- DEBUG({
+ LLVM_DEBUG({
for (unsigned i = 0; i < SUnits.size(); i++) {
dbgs() << "\tNode " << i << ":\n";
dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
@@ -1620,6 +1704,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
+ dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
+ dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
}
});
}
@@ -1778,7 +1864,8 @@ void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
RecRPTracker.closeBottom();
std::vector<SUnit *> SUnits(NS.begin(), NS.end());
- std::sort(SUnits.begin(), SUnits.end(), [](const SUnit *A, const SUnit *B) {
+ llvm::sort(SUnits.begin(), SUnits.end(),
+ [](const SUnit *A, const SUnit *B) {
return A->NodeNum > B->NodeNum;
});
@@ -1796,9 +1883,10 @@ void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
CriticalPSets,
RecRegPressure.MaxSetPressure);
if (RPDelta.Excess.isValid()) {
- DEBUG(dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
- << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
- << ":" << RPDelta.Excess.getUnitInc());
+ LLVM_DEBUG(
+ dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
+ << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
+ << ":" << RPDelta.Excess.getUnitInc());
NS.setExceedPressure(SU);
break;
}
@@ -1834,25 +1922,23 @@ void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
/// Check if the existing node-sets are profitable. If not, then ignore the
/// recurrent node-sets, and attempt to schedule all nodes together. This is
-/// a heuristic. If the MII is large and there is a non-recurrent node with
-/// a large depth compared to the MII, then it's best to try and schedule
-/// all instruction together instead of starting with the recurrent node-sets.
+/// a heuristic. If the MII is large and all the recurrent node-sets are small,
+/// then it's best to try to schedule all instructions together instead of
+/// starting with the recurrent node-sets.
void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
// Look for loops with a large MII.
- if (MII <= 20)
+ if (MII < 17)
return;
// Check if the node-set contains only a simple add recurrence.
- for (auto &NS : NodeSets)
- if (NS.size() > 2)
+ for (auto &NS : NodeSets) {
+ if (NS.getRecMII() > 2)
return;
- // If the depth of any instruction is significantly larger than the MII, then
- // ignore the recurrent node-sets and treat all instructions equally.
- for (auto &SU : SUnits)
- if (SU.getDepth() > MII * 1.5) {
- NodeSets.clear();
- DEBUG(dbgs() << "Clear recurrence node-sets\n");
+ if (NS.getMaxDepth() > MII)
return;
- }
+ }
+ NodeSets.clear();
+ LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
+ return;
}
/// Add the nodes that do not belong to a recurrence set into groups
@@ -1907,7 +1993,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
if (!NewSet.empty())
NodeSets.push_back(NewSet);
- // Create new nodes sets with the connected nodes any any remaining node that
+ // Create new nodes sets with the connected nodes any remaining node that
// has no predecessor.
for (unsigned i = 0; i < SUnits.size(); ++i) {
SUnit *SU = &SUnits[i];
@@ -1988,14 +2074,6 @@ void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
}
}
-/// Return true if Inst1 defines a value that is used in Inst2.
-static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2) {
- for (auto &SI : Inst1->Succs)
- if (SI.getSUnit() == Inst2 && SI.getKind() == SDep::Data)
- return true;
- return false;
-}
-
/// Compute an ordered list of the dependence graph nodes, which
/// indicates the order that the nodes will be scheduled. This is a
/// two-level algorithm. First, a partial order is created, which
@@ -2005,59 +2083,62 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
NodeOrder.clear();
for (auto &Nodes : NodeSets) {
- DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
+ LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
OrderKind Order;
SmallSetVector<SUnit *, 8> N;
if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
R.insert(N.begin(), N.end());
Order = BottomUp;
- DEBUG(dbgs() << " Bottom up (preds) ");
+ LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
} else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
R.insert(N.begin(), N.end());
Order = TopDown;
- DEBUG(dbgs() << " Top down (succs) ");
+ LLVM_DEBUG(dbgs() << " Top down (succs) ");
} else if (isIntersect(N, Nodes, R)) {
// If some of the successors are in the existing node-set, then use the
// top-down ordering.
Order = TopDown;
- DEBUG(dbgs() << " Top down (intersect) ");
+ LLVM_DEBUG(dbgs() << " Top down (intersect) ");
} else if (NodeSets.size() == 1) {
for (auto &N : Nodes)
if (N->Succs.size() == 0)
R.insert(N);
Order = BottomUp;
- DEBUG(dbgs() << " Bottom up (all) ");
+ LLVM_DEBUG(dbgs() << " Bottom up (all) ");
} else {
// Find the node with the highest ASAP.
SUnit *maxASAP = nullptr;
for (SUnit *SU : Nodes) {
- if (maxASAP == nullptr || getASAP(SU) >= getASAP(maxASAP))
+ if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
+ (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
maxASAP = SU;
}
R.insert(maxASAP);
Order = BottomUp;
- DEBUG(dbgs() << " Bottom up (default) ");
+ LLVM_DEBUG(dbgs() << " Bottom up (default) ");
}
while (!R.empty()) {
if (Order == TopDown) {
// Choose the node with the maximum height. If more than one, choose
- // the node with the lowest MOV. If still more than one, check if there
- // is a dependence between the instructions.
+ // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
+ // choose the node with the lowest MOV.
while (!R.empty()) {
SUnit *maxHeight = nullptr;
for (SUnit *I : R) {
if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
maxHeight = I;
else if (getHeight(I) == getHeight(maxHeight) &&
- getMOV(I) < getMOV(maxHeight) &&
- !hasDataDependence(maxHeight, I))
+ getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))
maxHeight = I;
- else if (hasDataDependence(I, maxHeight))
+ else if (getHeight(I) == getHeight(maxHeight) &&
+ getZeroLatencyHeight(I) ==
+ getZeroLatencyHeight(maxHeight) &&
+ getMOV(I) < getMOV(maxHeight))
maxHeight = I;
}
NodeOrder.insert(maxHeight);
- DEBUG(dbgs() << maxHeight->NodeNum << " ");
+ LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
R.remove(maxHeight);
for (const auto &I : maxHeight->Succs) {
if (Nodes.count(I.getSUnit()) == 0)
@@ -2080,28 +2161,29 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
}
}
Order = BottomUp;
- DEBUG(dbgs() << "\n Switching order to bottom up ");
+ LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
SmallSetVector<SUnit *, 8> N;
if (pred_L(NodeOrder, N, &Nodes))
R.insert(N.begin(), N.end());
} else {
// Choose the node with the maximum depth. If more than one, choose
- // the node with the lowest MOV. If there is still more than one, check
- // for a dependence between the instructions.
+ // the node with the maximum ZeroLatencyDepth. If still more than one,
+ // choose the node with the lowest MOV.
while (!R.empty()) {
SUnit *maxDepth = nullptr;
for (SUnit *I : R) {
if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
maxDepth = I;
else if (getDepth(I) == getDepth(maxDepth) &&
- getMOV(I) < getMOV(maxDepth) &&
- !hasDataDependence(I, maxDepth))
+ getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))
maxDepth = I;
- else if (hasDataDependence(maxDepth, I))
+ else if (getDepth(I) == getDepth(maxDepth) &&
+ getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
+ getMOV(I) < getMOV(maxDepth))
maxDepth = I;
}
NodeOrder.insert(maxDepth);
- DEBUG(dbgs() << maxDepth->NodeNum << " ");
+ LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
R.remove(maxDepth);
if (Nodes.isExceedSU(maxDepth)) {
Order = TopDown;
@@ -2114,8 +2196,6 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
continue;
if (NodeOrder.count(I.getSUnit()) != 0)
continue;
- if (I.getKind() == SDep::Anti)
- continue;
R.insert(I.getSUnit());
}
// Back-edges are predecessors with an anti-dependence.
@@ -2130,16 +2210,16 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
}
}
Order = TopDown;
- DEBUG(dbgs() << "\n Switching order to top down ");
+ LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
SmallSetVector<SUnit *, 8> N;
if (succ_L(NodeOrder, N, &Nodes))
R.insert(N.begin(), N.end());
}
}
- DEBUG(dbgs() << "\nDone with Nodeset\n");
+ LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
}
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Node order: ";
for (SUnit *I : NodeOrder)
dbgs() << " " << I->NodeNum << " ";
@@ -2158,7 +2238,7 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
Schedule.reset();
Schedule.setInitiationInterval(II);
- DEBUG(dbgs() << "Try to schedule with " << II << "\n");
+ LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
SetVector<SUnit *>::iterator NI = NodeOrder.begin();
SetVector<SUnit *>::iterator NE = NodeOrder.end();
@@ -2175,12 +2255,12 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
int SchedStart = INT_MIN;
Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
II, this);
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Inst (" << SU->NodeNum << ") ";
SU->getInstr()->dump();
dbgs() << "\n";
});
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
<< " me: " << SchedEnd << " ms: " << SchedStart << "\n";
});
@@ -2216,7 +2296,7 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
scheduleFound = false;
- DEBUG({
+ LLVM_DEBUG({
if (!scheduleFound)
dbgs() << "\tCan't schedule\n";
});
@@ -2227,7 +2307,7 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
scheduleFound = Schedule.isValidSchedule(this);
}
- DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
+ LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
if (scheduleFound)
Schedule.finalizeSchedule(this);
@@ -2250,7 +2330,7 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
// Remember the registers that are used in different stages. The index is
// the iteration, or stage, that the instruction is scheduled in. This is
- // a map between register names in the orignal block and the names created
+ // a map between register names in the original block and the names created
// in each stage of the pipelined loop.
ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
InstrMapTy InstrMap;
@@ -2297,7 +2377,7 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
InstrMap, MaxStageCount, MaxStageCount, false);
- DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
+ LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
SmallVector<MachineBasicBlock *, 4> EpilogBBs;
// Generate the epilog instructions to complete the pipeline.
@@ -2315,6 +2395,8 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
// Remove the original loop since it's no longer referenced.
+ for (auto &I : *BB)
+ LIS.RemoveMachineInstrFromMaps(I);
BB->clear();
BB->eraseFromParent();
@@ -2364,7 +2446,7 @@ void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
}
}
rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "prolog:\n";
NewBB->dump();
});
@@ -2431,7 +2513,9 @@ void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
continue;
MachineInstr *In = &BBI;
if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
- MachineInstr *NewMI = cloneInstr(In, EpilogStage - LastStage, 0);
+ // Instructions with memoperands in the epilog are updated with
+ // conservative values.
+ MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
NewBB->push_back(NewMI);
InstrMap[NewMI] = In;
@@ -2444,7 +2528,7 @@ void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
InstrMap, LastStage, EpilogStage, i == 1);
PredBB = NewBB;
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "epilog:\n";
NewBB->dump();
});
@@ -2550,24 +2634,20 @@ void SwingSchedulerDAG::generateExistingPhis(
// of the Phi value.
unsigned NewReg = VRMap[PrevStage][LoopVal];
rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
- Def, NewReg);
+ Def, InitVal, NewReg);
if (VRMap[CurStageNum].count(LoopVal))
VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
}
// Adjust the number of Phis needed depending on the number of prologs left,
- // and the distance from where the Phi is first scheduled.
- unsigned NumPhis = NumStages;
- if (!InKernel && (int)PrologStage < LoopValStage)
- // The NumPhis is the maximum number of new Phis needed during the steady
- // state. If the Phi has not been scheduled in current prolog, then we
- // need to generate less Phis.
- NumPhis = std::max((int)NumPhis - (int)(LoopValStage - PrologStage), 1);
- // The number of Phis cannot exceed the number of prolog stages. Each
- // stage can potentially define two values.
- NumPhis = std::min(NumPhis, PrologStage + 2);
+ // and the distance from where the Phi is first scheduled. The number of
+ // Phis cannot exceed the number of prolog stages. Each stage can
+ // potentially define two values.
+ unsigned MaxPhis = PrologStage + 2;
+ if (!InKernel && (int)PrologStage <= LoopValStage)
+ MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
+ unsigned NumPhis = std::min(NumStages, MaxPhis);
unsigned NewReg = 0;
-
unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
// In the epilog, we may need to look back one stage to get the correct
// Phi name because the epilog and prolog blocks execute the same stage.
@@ -2659,19 +2739,20 @@ void SwingSchedulerDAG::generateExistingPhis(
// references another Phi, and the other Phi is scheduled in an
// earlier stage. We can try to reuse an existing Phi up until the last
// stage of the current Phi.
- if (LoopDefIsPhi && (int)PrologStage >= StageScheduled) {
+ if (LoopDefIsPhi && (int)(PrologStage - np) >= StageScheduled) {
int LVNumStages = Schedule.getStagesForPhi(LoopVal);
int StageDiff = (StageScheduled - LoopValStage);
LVNumStages -= StageDiff;
- if (LVNumStages > (int)np) {
+ // Make sure the loop value Phi has been processed already.
+ if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
NewReg = PhiOp2;
unsigned ReuseStage = CurStageNum;
if (Schedule.isLoopCarried(this, *PhiInst))
ReuseStage -= LVNumStages;
// Check if the Phi to reuse has been generated yet. If not, then
// there is nothing to reuse.
- if (VRMap[ReuseStage].count(LoopVal)) {
- NewReg = VRMap[ReuseStage][LoopVal];
+ if (VRMap[ReuseStage - np].count(LoopVal)) {
+ NewReg = VRMap[ReuseStage - np][LoopVal];
rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
&*BBI, Def, NewReg);
@@ -2744,7 +2825,7 @@ void SwingSchedulerDAG::generateExistingPhis(
/// Generate Phis for the specified block in the generated pipelined code.
/// These are new Phis needed because the definition is scheduled after the
-/// use in the pipelened sequence.
+/// use in the pipelined sequence.
void SwingSchedulerDAG::generatePhis(
MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
@@ -2874,6 +2955,13 @@ void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
if (!MOI->isReg() || !MOI->isDef())
continue;
unsigned reg = MOI->getReg();
+ // Assume physical registers are used, unless they are marked dead.
+ if (TargetRegisterInfo::isPhysicalRegister(reg)) {
+ used = !MOI->isDead();
+ if (used)
+ break;
+ continue;
+ }
unsigned realUses = 0;
for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
EI = MRI.use_end();
@@ -2891,6 +2979,7 @@ void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
used = false;
}
if (!used) {
+ LIS.RemoveMachineInstrFromMaps(*MI);
MI++->eraseFromParent();
continue;
}
@@ -2905,6 +2994,7 @@ void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
++BBI;
unsigned reg = MI->getOperand(0).getReg();
if (MRI.use_begin(reg) == MRI.use_end()) {
+ LIS.RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
}
}
@@ -2924,10 +3014,8 @@ void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
MBBVectorTy &EpilogBBs,
SMSchedule &Schedule) {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
- BBF = KernelBB->getFirstNonPHI();
- BBI != BBF; ++BBI) {
- unsigned Def = BBI->getOperand(0).getReg();
+ for (auto &PHI : KernelBB->phis()) {
+ unsigned Def = PHI.getOperand(0).getReg();
// Check for any Phi definition that used as an operand of another Phi
// in the same block.
for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
@@ -2935,7 +3023,7 @@ void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
I != E; ++I) {
if (I->isPHI() && I->getParent() == KernelBB) {
// Get the loop carried definition.
- unsigned LCDef = getLoopPhiReg(*BBI, KernelBB);
+ unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
if (!LCDef)
continue;
MachineInstr *MI = MRI.getVRegDef(LCDef);
@@ -3099,12 +3187,14 @@ void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
continue;
}
unsigned Delta;
- if (computeDelta(OldMI, Delta)) {
+ if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
int64_t AdjOffset = Delta * Num;
NewMemRefs[Refs++] =
MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize());
- } else
- NewMemRefs[Refs++] = MF.getMachineMemOperand(MMO, 0, UINT64_MAX);
+ } else {
+ NewMI.dropMemRefs();
+ return;
+ }
}
NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs);
}
@@ -3249,13 +3339,11 @@ void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
SMSchedule &Schedule,
ValueMapTy *VRMap,
InstrMapTy &InstrMap) {
- for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
- BBE = BB->getFirstNonPHI();
- BBI != BBE; ++BBI) {
+ for (auto &PHI : BB->phis()) {
unsigned InitVal = 0;
unsigned LoopVal = 0;
- getPhiRegs(*BBI, BB, InitVal, LoopVal);
- unsigned PhiDef = BBI->getOperand(0).getReg();
+ getPhiRegs(PHI, BB, InitVal, LoopVal);
+ unsigned PhiDef = PHI.getOperand(0).getReg();
unsigned PhiStage =
(unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
@@ -3269,7 +3357,7 @@ void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
if (!NewVal)
NewVal = InitVal;
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &*BBI,
+ rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI,
PhiDef, NewVal);
}
}
@@ -3375,10 +3463,15 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
return false;
- // Make sure offset values are both positive or both negative.
+ // Make sure that the instructions do not access the same memory location in
+ // the next iteration.
int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
- if ((LoadOffset >= 0) != (StoreOffset >= 0))
+ MachineInstr *NewMI = MF.CloneMachineInstr(MI);
+ NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
+ bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
+ MF.DeleteMachineInstr(NewMI);
+ if (!Disjoint)
return false;
// Set the return value once we determine that we return true.
@@ -3425,17 +3518,21 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
}
}
-/// Return true for an order dependence that is loop carried potentially.
-/// An order dependence is loop carried if the destination defines a value
-/// that may be used by the source in a subsequent iteration.
-bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep,
- bool isSucc) {
- if (!isOrder(Source, Dep) || Dep.isArtificial())
+/// Return true for an order or output dependence that is loop carried
+/// potentially. A dependence is loop carried if the destination defines a valu
+/// that may be used or defined by the source in a subsequent iteration.
+bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
+ bool isSucc) {
+ if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
+ Dep.isArtificial())
return false;
if (!SwpPruneLoopCarried)
return true;
+ if (Dep.getKind() == SDep::Output)
+ return true;
+
MachineInstr *SI = Source->getInstr();
MachineInstr *DI = Dep.getSUnit()->getInstr();
if (!isSucc)
@@ -3465,6 +3562,19 @@ bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep,
if (BaseRegS != BaseRegD)
return true;
+ // Check that the base register is incremented by a constant value for each
+ // iteration.
+ MachineInstr *Def = MRI.getVRegDef(BaseRegS);
+ if (!Def || !Def->isPHI())
+ return true;
+ unsigned InitVal = 0;
+ unsigned LoopVal = 0;
+ getPhiRegs(*Def, BB, InitVal, LoopVal);
+ MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
+ int D = 0;
+ if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
+ return true;
+
uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
@@ -3516,7 +3626,7 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
}
if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
Resources->canReserveResources(*SU->getInstr())) {
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "\tinsert at cycle " << curCycle << " ";
SU->getInstr()->dump();
});
@@ -3529,7 +3639,7 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
FirstCycle = curCycle;
return true;
}
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
SU->getInstr()->dump();
});
@@ -3553,7 +3663,7 @@ int SMSchedule::earliestCycleInChain(const SDep &Dep) {
continue;
EarlyCycle = std::min(EarlyCycle, it->second);
for (const auto &PI : PrevSU->Preds)
- if (SwingSchedulerDAG::isOrder(PrevSU, PI))
+ if (PI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
Worklist.push_back(PI);
Visited.insert(PrevSU);
}
@@ -3576,7 +3686,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) {
continue;
LateCycle = std::max(LateCycle, it->second);
for (const auto &SI : SuccSU->Succs)
- if (SwingSchedulerDAG::isOrder(SuccSU, SI))
+ if (SI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
Worklist.push_back(SI);
Visited.insert(SuccSU);
}
@@ -3590,7 +3700,7 @@ static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
for (auto &P : SU->Preds)
if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
for (auto &S : P.getSUnit()->Succs)
- if (S.getKind() == SDep::Order && S.getSUnit()->getInstr()->isPHI())
+ if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
return P.getSUnit();
return nullptr;
}
@@ -3601,7 +3711,7 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
int *MinEnd, int *MaxStart, int II,
SwingSchedulerDAG *DAG) {
// Iterate over each instruction that has been scheduled already. The start
- // slot computuation depends on whether the previously scheduled instruction
+ // slot computation depends on whether the previously scheduled instruction
// is a predecessor or successor of the specified instruction.
for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
@@ -3613,15 +3723,15 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
const SDep &Dep = SU->Preds[i];
if (Dep.getSUnit() == I) {
if (!DAG->isBackedge(SU, Dep)) {
- int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
+ int EarlyStart = cycle + Dep.getLatency() -
DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
*MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
- if (DAG->isLoopCarriedOrder(SU, Dep, false)) {
+ if (DAG->isLoopCarriedDep(SU, Dep, false)) {
int End = earliestCycleInChain(Dep) + (II - 1);
*MinEnd = std::min(*MinEnd, End);
}
} else {
- int LateStart = cycle - DAG->getLatency(SU, Dep) +
+ int LateStart = cycle - Dep.getLatency() +
DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
*MinLateStart = std::min(*MinLateStart, LateStart);
}
@@ -3633,23 +3743,24 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
!SU->isPred(I))
*MinLateStart = std::min(*MinLateStart, cycle);
}
- for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i)
+ for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
if (SU->Succs[i].getSUnit() == I) {
const SDep &Dep = SU->Succs[i];
if (!DAG->isBackedge(SU, Dep)) {
- int LateStart = cycle - DAG->getLatency(SU, Dep) +
+ int LateStart = cycle - Dep.getLatency() +
DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
*MinLateStart = std::min(*MinLateStart, LateStart);
- if (DAG->isLoopCarriedOrder(SU, Dep)) {
+ if (DAG->isLoopCarriedDep(SU, Dep)) {
int Start = latestCycleInChain(Dep) + 1 - II;
*MaxStart = std::max(*MaxStart, Start);
}
} else {
- int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
+ int EarlyStart = cycle + Dep.getLatency() -
DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
*MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
}
}
+ }
}
}
}
@@ -3657,7 +3768,7 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
/// Order the instructions within a cycle so that the definitions occur
/// before the uses. Returns true if the instruction is added to the start
/// of the list, or false if added to the end.
-bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
+void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
std::deque<SUnit *> &Insts) {
MachineInstr *MI = SU->getInstr();
bool OrderBeforeUse = false;
@@ -3670,13 +3781,11 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
unsigned Pos = 0;
for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
++I, ++Pos) {
- // Relative order of Phis does not matter.
- if (MI->isPHI() && (*I)->getInstr()->isPHI())
- continue;
for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
+
unsigned Reg = MO.getReg();
unsigned BasePos, OffsetPos;
if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
@@ -3688,7 +3797,8 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
(*I)->getInstr()->readsWritesVirtualRegister(Reg);
if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
OrderBeforeUse = true;
- MoveUse = Pos;
+ if (MoveUse == 0)
+ MoveUse = Pos;
} else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
// Add the instruction after the scheduled instruction.
OrderAfterDef = true;
@@ -3696,14 +3806,16 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
} else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
OrderBeforeUse = true;
- MoveUse = Pos;
+ if (MoveUse == 0)
+ MoveUse = Pos;
} else {
OrderAfterDef = true;
MoveDef = Pos;
}
} else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
OrderBeforeUse = true;
- MoveUse = Pos;
+ if (MoveUse == 0)
+ MoveUse = Pos;
if (MoveUse != 0) {
OrderAfterDef = true;
MoveDef = Pos - 1;
@@ -3711,49 +3823,35 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
} else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
// Add the instruction before the scheduled instruction.
OrderBeforeUse = true;
- MoveUse = Pos;
+ if (MoveUse == 0)
+ MoveUse = Pos;
} else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
- OrderBeforeDef = true;
- MoveUse = Pos;
+ if (MoveUse == 0) {
+ OrderBeforeDef = true;
+ MoveUse = Pos;
+ }
}
}
// Check for order dependences between instructions. Make sure the source
// is ordered before the destination.
- for (auto &S : SU->Succs)
- if (S.getKind() == SDep::Order) {
- if (S.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
- OrderBeforeUse = true;
- MoveUse = Pos;
- }
- } else if (TargetRegisterInfo::isPhysicalRegister(S.getReg())) {
- if (cycleScheduled(SU) != cycleScheduled(S.getSUnit())) {
- if (S.isAssignedRegDep()) {
- OrderAfterDef = true;
- MoveDef = Pos;
- }
- } else {
- OrderBeforeUse = true;
+ for (auto &S : SU->Succs) {
+ if (S.getSUnit() != *I)
+ continue;
+ if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
+ OrderBeforeUse = true;
+ if (Pos < MoveUse)
MoveUse = Pos;
- }
}
- for (auto &P : SU->Preds)
- if (P.getKind() == SDep::Order) {
- if (P.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
- OrderAfterDef = true;
- MoveDef = Pos;
- }
- } else if (TargetRegisterInfo::isPhysicalRegister(P.getReg())) {
- if (cycleScheduled(SU) != cycleScheduled(P.getSUnit())) {
- if (P.isAssignedRegDep()) {
- OrderBeforeUse = true;
- MoveUse = Pos;
- }
- } else {
- OrderAfterDef = true;
- MoveDef = Pos;
- }
+ }
+ for (auto &P : SU->Preds) {
+ if (P.getSUnit() != *I)
+ continue;
+ if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
+ OrderAfterDef = true;
+ MoveDef = Pos;
}
+ }
}
// A circular dependence.
@@ -3777,16 +3875,10 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
Insts.erase(Insts.begin() + MoveDef);
Insts.erase(Insts.begin() + MoveUse);
}
- if (orderDependence(SSD, UseSU, Insts)) {
- Insts.push_front(SU);
- orderDependence(SSD, DefSU, Insts);
- return true;
- }
- Insts.pop_back();
- Insts.push_back(SU);
- Insts.push_back(UseSU);
+ orderDependence(SSD, UseSU, Insts);
+ orderDependence(SSD, SU, Insts);
orderDependence(SSD, DefSU, Insts);
- return false;
+ return;
}
// Put the new instruction first if there is a use in the list. Otherwise,
// put it at the end of the list.
@@ -3794,14 +3886,13 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
Insts.push_front(SU);
else
Insts.push_back(SU);
- return OrderBeforeUse;
}
/// Return true if the scheduled Phi has a loop carried operand.
bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {
if (!Phi.isPHI())
return false;
- assert(Phi.isPHI() && "Expecing a Phi.");
+ assert(Phi.isPHI() && "Expecting a Phi.");
SUnit *DefSU = SSD->getSUnit(&Phi);
unsigned DefCycle = cycleScheduled(DefSU);
int DefStage = stageScheduled(DefSU);
@@ -3868,6 +3959,100 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
return true;
}
+/// A property of the node order in swing-modulo-scheduling is
+/// that for nodes outside circuits the following holds:
+/// none of them is scheduled after both a successor and a
+/// predecessor.
+/// The method below checks whether the property is met.
+/// If not, debug information is printed and statistics information updated.
+/// Note that we do not use an assert statement.
+/// The reason is that although an invalid node oder may prevent
+/// the pipeliner from finding a pipelined schedule for arbitrary II,
+/// it does not lead to the generation of incorrect code.
+void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
+
+ // a sorted vector that maps each SUnit to its index in the NodeOrder
+ typedef std::pair<SUnit *, unsigned> UnitIndex;
+ std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
+
+ for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
+ Indices.push_back(std::make_pair(NodeOrder[i], i));
+
+ auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
+ return std::get<0>(i1) < std::get<0>(i2);
+ };
+
+ // sort, so that we can perform a binary search
+ llvm::sort(Indices.begin(), Indices.end(), CompareKey);
+
+ bool Valid = true;
+ (void)Valid;
+ // for each SUnit in the NodeOrder, check whether
+ // it appears after both a successor and a predecessor
+ // of the SUnit. If this is the case, and the SUnit
+ // is not part of circuit, then the NodeOrder is not
+ // valid.
+ for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
+ SUnit *SU = NodeOrder[i];
+ unsigned Index = i;
+
+ bool PredBefore = false;
+ bool SuccBefore = false;
+
+ SUnit *Succ;
+ SUnit *Pred;
+ (void)Succ;
+ (void)Pred;
+
+ for (SDep &PredEdge : SU->Preds) {
+ SUnit *PredSU = PredEdge.getSUnit();
+ unsigned PredIndex =
+ std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
+ std::make_pair(PredSU, 0), CompareKey));
+ if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
+ PredBefore = true;
+ Pred = PredSU;
+ break;
+ }
+ }
+
+ for (SDep &SuccEdge : SU->Succs) {
+ SUnit *SuccSU = SuccEdge.getSUnit();
+ unsigned SuccIndex =
+ std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
+ std::make_pair(SuccSU, 0), CompareKey));
+ if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
+ SuccBefore = true;
+ Succ = SuccSU;
+ break;
+ }
+ }
+
+ if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
+ // instructions in circuits are allowed to be scheduled
+ // after both a successor and predecessor.
+ bool InCircuit = std::any_of(
+ Circuits.begin(), Circuits.end(),
+ [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
+ if (InCircuit)
+ LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
+ else {
+ Valid = false;
+ NumNodeOrderIssues++;
+ LLVM_DEBUG(dbgs() << "Predecessor ";);
+ }
+ LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
+ << " are scheduled before node " << SU->NodeNum
+ << "\n";);
+ }
+ }
+
+ LLVM_DEBUG({
+ if (!Valid)
+ dbgs() << "Invalid node order found!\n";
+ });
+}
+
/// Attempt to fix the degenerate cases when the instruction serialization
/// causes the register lifetimes to overlap. For example,
/// p' = store_pi(p, b)
@@ -3987,27 +4172,25 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
// generated code.
for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
- std::deque<SUnit *> newOrderZC;
- // Put the zero-cost, pseudo instructions at the start of the cycle.
+ std::deque<SUnit *> newOrderPhi;
for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
SUnit *SU = cycleInstrs[i];
- if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
- orderDependence(SSD, SU, newOrderZC);
+ if (SU->getInstr()->isPHI())
+ newOrderPhi.push_back(SU);
}
std::deque<SUnit *> newOrderI;
- // Then, add the regular instructions back.
for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
SUnit *SU = cycleInstrs[i];
- if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
+ if (!SU->getInstr()->isPHI())
orderDependence(SSD, SU, newOrderI);
}
// Replace the old order with the new order.
- cycleInstrs.swap(newOrderZC);
+ cycleInstrs.swap(newOrderPhi);
cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
SSD->fixupRegisterOverlaps(cycleInstrs);
}
- DEBUG(dump(););
+ LLVM_DEBUG(dump(););
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp
index 1e74104e89ed..2619d8f78276 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/RegionInfoImpl.h"
#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
@@ -89,7 +90,7 @@ bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) {
RI.recalculate(F, DT, PDT, DF);
- DEBUG(RI.dump());
+ LLVM_DEBUG(RI.dump());
return false;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index b82ab02a6e6c..6095bdd06b69 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
@@ -65,23 +66,66 @@ void MachineRegisterInfo::setRegBank(unsigned Reg,
VRegInfo[Reg].first = &RegBank;
}
-const TargetRegisterClass *
-MachineRegisterInfo::constrainRegClass(unsigned Reg,
- const TargetRegisterClass *RC,
- unsigned MinNumRegs) {
- const TargetRegisterClass *OldRC = getRegClass(Reg);
+static const TargetRegisterClass *
+constrainRegClass(MachineRegisterInfo &MRI, unsigned Reg,
+ const TargetRegisterClass *OldRC,
+ const TargetRegisterClass *RC, unsigned MinNumRegs) {
if (OldRC == RC)
return RC;
const TargetRegisterClass *NewRC =
- getTargetRegisterInfo()->getCommonSubClass(OldRC, RC);
+ MRI.getTargetRegisterInfo()->getCommonSubClass(OldRC, RC);
if (!NewRC || NewRC == OldRC)
return NewRC;
if (NewRC->getNumRegs() < MinNumRegs)
return nullptr;
- setRegClass(Reg, NewRC);
+ MRI.setRegClass(Reg, NewRC);
return NewRC;
}
+const TargetRegisterClass *
+MachineRegisterInfo::constrainRegClass(unsigned Reg,
+ const TargetRegisterClass *RC,
+ unsigned MinNumRegs) {
+ return ::constrainRegClass(*this, Reg, getRegClass(Reg), RC, MinNumRegs);
+}
+
+bool
+MachineRegisterInfo::constrainRegAttrs(unsigned Reg,
+ unsigned ConstrainingReg,
+ unsigned MinNumRegs) {
+ auto const *OldRC = getRegClassOrNull(Reg);
+ auto const *RC = getRegClassOrNull(ConstrainingReg);
+ // A virtual register at any point must have either a low-level type
+ // or a class assigned, but not both. The only exception is the internals of
+ // GlobalISel's instruction selection pass, which is allowed to temporarily
+ // introduce registers with types and classes both.
+ assert((OldRC || getType(Reg).isValid()) && "Reg has neither class nor type");
+ assert((!OldRC || !getType(Reg).isValid()) && "Reg has class and type both");
+ assert((RC || getType(ConstrainingReg).isValid()) &&
+ "ConstrainingReg has neither class nor type");
+ assert((!RC || !getType(ConstrainingReg).isValid()) &&
+ "ConstrainingReg has class and type both");
+ if (OldRC && RC)
+ return ::constrainRegClass(*this, Reg, OldRC, RC, MinNumRegs);
+ // If one of the virtual registers is generic (used in generic machine
+ // instructions, has a low-level type, doesn't have a class), and the other is
+ // concrete (used in target specific instructions, doesn't have a low-level
+ // type, has a class), we can not unify them.
+ if (OldRC || RC)
+ return false;
+ // At this point, both registers are guaranteed to have a valid low-level
+ // type, and they must agree.
+ if (getType(Reg) != getType(ConstrainingReg))
+ return false;
+ auto const *OldRB = getRegBankOrNull(Reg);
+ auto const *RB = getRegBankOrNull(ConstrainingReg);
+ if (OldRB)
+ return !RB || RB == OldRB;
+ if (RB)
+ setRegBank(Reg, *RB);
+ return true;
+}
+
bool
MachineRegisterInfo::recomputeRegClass(unsigned Reg) {
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -107,10 +151,11 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) {
return true;
}
-unsigned MachineRegisterInfo::createIncompleteVirtualRegister() {
+unsigned MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
VRegInfo.grow(Reg);
RegAllocHints.grow(Reg);
+ insertVRegByName(Name, Reg);
return Reg;
}
@@ -118,47 +163,42 @@ unsigned MachineRegisterInfo::createIncompleteVirtualRegister() {
/// function with the specified register class.
///
unsigned
-MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
+MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
+ StringRef Name) {
assert(RegClass && "Cannot create register without RegClass!");
assert(RegClass->isAllocatable() &&
"Virtual register RegClass must be allocatable.");
// New virtual register number.
- unsigned Reg = createIncompleteVirtualRegister();
+ unsigned Reg = createIncompleteVirtualRegister(Name);
VRegInfo[Reg].first = RegClass;
if (TheDelegate)
TheDelegate->MRI_NoteNewVirtualRegister(Reg);
return Reg;
}
-LLT MachineRegisterInfo::getType(unsigned VReg) const {
- VRegToTypeMap::const_iterator TypeIt = getVRegToType().find(VReg);
- return TypeIt != getVRegToType().end() ? TypeIt->second : LLT{};
-}
-
void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) {
// Check that VReg doesn't have a class.
assert((getRegClassOrRegBank(VReg).isNull() ||
!getRegClassOrRegBank(VReg).is<const TargetRegisterClass *>()) &&
"Can't set the size of a non-generic virtual register");
- getVRegToType()[VReg] = Ty;
+ VRegToType.grow(VReg);
+ VRegToType[VReg] = Ty;
}
unsigned
-MachineRegisterInfo::createGenericVirtualRegister(LLT Ty) {
+MachineRegisterInfo::createGenericVirtualRegister(LLT Ty, StringRef Name) {
// New virtual register number.
- unsigned Reg = createIncompleteVirtualRegister();
+ unsigned Reg = createIncompleteVirtualRegister(Name);
// FIXME: Should we use a dummy register class?
VRegInfo[Reg].first = static_cast<RegisterBank *>(nullptr);
- getVRegToType()[Reg] = Ty;
+ setType(Reg, Ty);
if (TheDelegate)
TheDelegate->MRI_NoteNewVirtualRegister(Reg);
return Reg;
}
-void MachineRegisterInfo::clearVirtRegTypes() {
- getVRegToType().clear();
-}
+void MachineRegisterInfo::clearVirtRegTypes() { VRegToType.clear(); }
/// clearVirtRegs - Remove all virtual registers (after physreg assignment).
void MachineRegisterInfo::clearVirtRegs() {
diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
index 36844e9fb30a..773661965f18 100644
--- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -204,7 +204,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
// If the client wants to know about all new instructions, tell it.
if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
- DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ LLVM_DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
return InsertedPHI->getOperand(0).getReg();
}
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index e15eb658a05c..502d18f08f93 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -32,7 +32,6 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
@@ -48,6 +47,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -55,6 +55,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -271,7 +272,7 @@ priorNonDebug(MachineBasicBlock::const_iterator I,
MachineBasicBlock::const_iterator Beg) {
assert(I != Beg && "reached the top of the region, cannot decrement");
while (--I != Beg) {
- if (!I->isDebugValue())
+ if (!I->isDebugInstr())
break;
}
return I;
@@ -291,7 +292,7 @@ static MachineBasicBlock::const_iterator
nextIfDebug(MachineBasicBlock::const_iterator I,
MachineBasicBlock::const_iterator End) {
for(; I != End; ++I) {
- if (!I->isDebugValue())
+ if (!I->isDebugInstr())
break;
}
return I;
@@ -344,7 +345,7 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
/// This design avoids exposing scheduling boundaries to the DAG builder,
/// simplifying the DAG builder's support for "special" target instructions.
/// At the same time the design allows target schedulers to operate across
-/// scheduling boundaries, for example to bundle the boudary instructions
+/// scheduling boundaries, for example to bundle the boundary instructions
/// without reordering them. This creates complexity, because the target
/// scheduler must update the RegionBegin and RegionEnd positions cached by
/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
@@ -360,7 +361,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
} else if (!mf.getSubtarget().enableMachineScheduler())
return false;
- DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
+ LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
// Initialize the context of the pass.
MF = &mf;
@@ -372,7 +373,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
LIS = &getAnalysis<LiveIntervals>();
if (VerifyScheduling) {
- DEBUG(LIS->dump());
+ LLVM_DEBUG(LIS->dump());
MF->verify(this, "Before machine scheduling.");
}
RegClassInfo->runOnMachineFunction(*MF);
@@ -382,7 +383,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
scheduleRegions(*Scheduler, false);
- DEBUG(LIS->dump());
+ LLVM_DEBUG(LIS->dump());
if (VerifyScheduling)
MF->verify(this, "After machine scheduling.");
return true;
@@ -396,10 +397,10 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
if (!EnablePostRAMachineSched)
return false;
} else if (!mf.getSubtarget().enablePostRAScheduler()) {
- DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
+ LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
return false;
}
- DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
+ LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
// Initialize the context of the pass.
MF = &mf;
@@ -481,7 +482,7 @@ getSchedRegions(MachineBasicBlock *MBB,
MachineInstr &MI = *std::prev(I);
if (isSchedBoundary(&MI, &*MBB, MF, TII))
break;
- if (!MI.isDebugValue())
+ if (!MI.isDebugInstr())
// MBB::size() uses instr_iterator to count. Here we need a bundle to
// count as a single instruction.
++NumRegionInstrs;
@@ -547,12 +548,13 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
Scheduler.exitRegion();
continue;
}
- DEBUG(dbgs() << "********** MI Scheduling **********\n");
- DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB) << " "
- << MBB->getName() << "\n From: " << *I << " To: ";
- if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
- else dbgs() << "End";
- dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
+ LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
+ LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
+ << " " << MBB->getName() << "\n From: " << *I
+ << " To: ";
+ if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
+ else dbgs() << "End";
+ dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
if (DumpCriticalPathLength) {
errs() << MF->getName();
errs() << ":%bb. " << MBB->getNumber();
@@ -749,8 +751,8 @@ bool ScheduleDAGMI::checkSchedLimit() {
/// does not consider liveness or register pressure. It is useful for PostRA
/// scheduling and potentially other custom schedulers.
void ScheduleDAGMI::schedule() {
- DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
- DEBUG(SchedImpl->dumpPolicy());
+ LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
+ LLVM_DEBUG(SchedImpl->dumpPolicy());
// Build the DAG.
buildSchedGraph(AA);
@@ -762,26 +764,22 @@ void ScheduleDAGMI::schedule() {
SmallVector<SUnit*, 8> TopRoots, BotRoots;
findRootsAndBiasEdges(TopRoots, BotRoots);
+ LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
+ for (const SUnit &SU
+ : SUnits) SU.dumpAll(this);
+ if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
+ if (ViewMISchedDAGs) viewGraph();
+
// Initialize the strategy before modifying the DAG.
// This may initialize a DFSResult to be used for queue priority.
SchedImpl->initialize(this);
- DEBUG(
- if (EntrySU.getInstr() != nullptr)
- EntrySU.dumpAll(this);
- for (const SUnit &SU : SUnits)
- SU.dumpAll(this);
- if (ExitSU.getInstr() != nullptr)
- ExitSU.dumpAll(this);
- );
- if (ViewMISchedDAGs) viewGraph();
-
// Initialize ready queues now that the DAG and priority data are finalized.
initQueues(TopRoots, BotRoots);
bool IsTopNode = false;
while (true) {
- DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
+ LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
SUnit *SU = SchedImpl->pickNode(IsTopNode);
if (!SU) break;
@@ -821,7 +819,7 @@ void ScheduleDAGMI::schedule() {
placeDebugValues();
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) << " ***\n";
dumpSchedule();
@@ -1016,7 +1014,7 @@ void ScheduleDAGMILive::initRegPressure() {
// Close the RPTracker to finalize live ins.
RPTracker.closeRegion();
- DEBUG(RPTracker.dump());
+ LLVM_DEBUG(RPTracker.dump());
// Initialize the live ins and live outs.
TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
@@ -1031,8 +1029,8 @@ void ScheduleDAGMILive::initRegPressure() {
BotRPTracker.initLiveThru(RPTracker);
if (!BotRPTracker.getLiveThru().empty()) {
TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
- DEBUG(dbgs() << "Live Thru: ";
- dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
+ LLVM_DEBUG(dbgs() << "Live Thru: ";
+ dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
};
// For each live out vreg reduce the pressure change associated with other
@@ -1046,15 +1044,13 @@ void ScheduleDAGMILive::initRegPressure() {
updatePressureDiffs(LiveUses);
}
- DEBUG(
- dbgs() << "Top Pressure:\n";
- dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
- dbgs() << "Bottom Pressure:\n";
- dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
- );
+ LLVM_DEBUG(dbgs() << "Top Pressure:\n";
+ dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
+ dbgs() << "Bottom Pressure:\n";
+ dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
assert((BotRPTracker.getPos() == RegionEnd ||
- (RegionEnd->isDebugValue() &&
+ (RegionEnd->isDebugInstr() &&
BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
"Can't find the region bottom");
@@ -1066,17 +1062,16 @@ void ScheduleDAGMILive::initRegPressure() {
for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
if (RegionPressure[i] > Limit) {
- DEBUG(dbgs() << TRI->getRegPressureSetName(i)
- << " Limit " << Limit
- << " Actual " << RegionPressure[i] << "\n");
+ LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
+ << " Actual " << RegionPressure[i] << "\n");
RegionCriticalPSets.push_back(PressureChange(i));
}
}
- DEBUG(dbgs() << "Excess PSets: ";
- for (const PressureChange &RCPS : RegionCriticalPSets)
- dbgs() << TRI->getRegPressureSetName(
- RCPS.getPSet()) << " ";
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Excess PSets: ";
+ for (const PressureChange &RCPS
+ : RegionCriticalPSets) dbgs()
+ << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
+ dbgs() << "\n");
}
void ScheduleDAGMILive::
@@ -1097,10 +1092,11 @@ updateScheduledPressure(const SUnit *SU,
}
unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
if (NewMaxPressure[ID] >= Limit - 2) {
- DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
- << NewMaxPressure[ID]
- << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
- << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
+ LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
+ << NewMaxPressure[ID]
+ << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
+ << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
+ << " livethru)\n");
}
}
}
@@ -1130,17 +1126,14 @@ void ScheduleDAGMILive::updatePressureDiffs(
PressureDiff &PDiff = getPressureDiff(&SU);
PDiff.addPressureChange(Reg, Decrement, &MRI);
- DEBUG(
- dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
- << printReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
- << ' ' << *SU.getInstr();
- dbgs() << " to ";
- PDiff.dump(*TRI);
- );
+ LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
+ << printReg(Reg, TRI) << ':'
+ << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
+ dbgs() << " to "; PDiff.dump(*TRI););
}
} else {
assert(P.LaneMask.any());
- DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
+ LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
// This may be called before CurrentBottom has been initialized. However,
// BotRPTracker must have a valid position. We want the value live into the
// instruction or live out of the block, so ask for the previous
@@ -1168,12 +1161,9 @@ void ScheduleDAGMILive::updatePressureDiffs(
if (LRQ.valueIn() == VNI) {
PressureDiff &PDiff = getPressureDiff(SU);
PDiff.addPressureChange(Reg, true, &MRI);
- DEBUG(
- dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
- << *SU->getInstr();
- dbgs() << " to ";
- PDiff.dump(*TRI);
- );
+ LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
+ << *SU->getInstr();
+ dbgs() << " to "; PDiff.dump(*TRI););
}
}
}
@@ -1192,8 +1182,8 @@ void ScheduleDAGMILive::updatePressureDiffs(
/// ScheduleDAGMILive then it will want to override this virtual method in order
/// to update any specialized state.
void ScheduleDAGMILive::schedule() {
- DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
- DEBUG(SchedImpl->dumpPolicy());
+ LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
+ LLVM_DEBUG(SchedImpl->dumpPolicy());
buildDAGWithRegPressure();
Topo.InitDAGTopologicalSorting();
@@ -1207,26 +1197,22 @@ void ScheduleDAGMILive::schedule() {
// This may initialize a DFSResult to be used for queue priority.
SchedImpl->initialize(this);
- DEBUG(
- if (EntrySU.getInstr() != nullptr)
- EntrySU.dumpAll(this);
- for (const SUnit &SU : SUnits) {
- SU.dumpAll(this);
- if (ShouldTrackPressure) {
- dbgs() << " Pressure Diff : ";
- getPressureDiff(&SU).dump(*TRI);
- }
- dbgs() << " Single Issue : ";
- if (SchedModel.mustBeginGroup(SU.getInstr()) &&
- SchedModel.mustEndGroup(SU.getInstr()))
- dbgs() << "true;";
- else
- dbgs() << "false;";
- dbgs() << '\n';
- }
- if (ExitSU.getInstr() != nullptr)
- ExitSU.dumpAll(this);
- );
+ LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
+ for (const SUnit &SU
+ : SUnits) {
+ SU.dumpAll(this);
+ if (ShouldTrackPressure) {
+ dbgs() << " Pressure Diff : ";
+ getPressureDiff(&SU).dump(*TRI);
+ }
+ dbgs() << " Single Issue : ";
+ if (SchedModel.mustBeginGroup(SU.getInstr()) &&
+ SchedModel.mustEndGroup(SU.getInstr()))
+ dbgs() << "true;";
+ else
+ dbgs() << "false;";
+ dbgs() << '\n';
+ } if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
if (ViewMISchedDAGs) viewGraph();
// Initialize ready queues now that the DAG and priority data are finalized.
@@ -1234,7 +1220,7 @@ void ScheduleDAGMILive::schedule() {
bool IsTopNode = false;
while (true) {
- DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
+ LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
SUnit *SU = SchedImpl->pickNode(IsTopNode);
if (!SU) break;
@@ -1262,7 +1248,7 @@ void ScheduleDAGMILive::schedule() {
placeDebugValues();
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) << " ***\n";
dumpSchedule();
@@ -1379,13 +1365,13 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
} else
CyclicLatency = 0;
- DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
- << SU->NodeNum << ") = " << CyclicLatency << "c\n");
+ LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
+ << SU->NodeNum << ") = " << CyclicLatency << "c\n");
if (CyclicLatency > MaxCyclicLatency)
MaxCyclicLatency = CyclicLatency;
}
}
- DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
+ LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
return MaxCyclicLatency;
}
@@ -1429,10 +1415,8 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
TopRPTracker.advance(RegOpers);
assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
- DEBUG(
- dbgs() << "Top Pressure:\n";
- dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
- );
+ LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
+ TopRPTracker.getRegSetPressureAtPos(), TRI););
updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
}
@@ -1449,6 +1433,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
}
moveInstruction(MI, CurrentBottom);
CurrentBottom = MI;
+ BotRPTracker.setPos(CurrentBottom);
}
if (ShouldTrackPressure) {
RegisterOperands RegOpers;
@@ -1467,10 +1452,8 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
SmallVector<RegisterMaskPair, 8> LiveUses;
BotRPTracker.recede(RegOpers, &LiveUses);
assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
- DEBUG(
- dbgs() << "Bottom Pressure:\n";
- dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
- );
+ LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
+ BotRPTracker.getRegSetPressureAtPos(), TRI););
updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
updatePressureDiffs(LiveUses);
@@ -1484,7 +1467,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
namespace {
-/// \brief Post-process the DAG to create cluster edges between neighboring
+/// Post-process the DAG to create cluster edges between neighboring
/// loads or between neighboring stores.
class BaseMemOpClusterMutation : public ScheduleDAGMutation {
struct MemOpInfo {
@@ -1561,7 +1544,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
if (MemOpRecords.size() < 2)
return;
- std::sort(MemOpRecords.begin(), MemOpRecords.end());
+ llvm::sort(MemOpRecords.begin(), MemOpRecords.end());
unsigned ClusterLength = 1;
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
SUnit *SUa = MemOpRecords[Idx].SU;
@@ -1570,8 +1553,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
*SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
ClusterLength) &&
DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
- DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
- << SUb->NodeNum << ")\n");
+ LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
+ << SUb->NodeNum << ")\n");
// Copy successor edges from SUa to SUb. Interleaving computation
// dependent on SUa can prevent load combining due to register reuse.
// Predecessor edges do not need to be copied from SUb to SUa since nearby
@@ -1579,7 +1562,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
for (const SDep &Succ : SUa->Succs) {
if (Succ.getSUnit() == SUb)
continue;
- DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n");
+ LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
+ << ")\n");
DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
}
++ClusterLength;
@@ -1588,7 +1572,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
}
}
-/// \brief Callback from DAG postProcessing to create cluster edges for loads.
+/// Callback from DAG postProcessing to create cluster edges for loads.
void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
@@ -1629,7 +1613,7 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
namespace {
-/// \brief Post-process the DAG to create weak edges from all uses of a copy to
+/// Post-process the DAG to create weak edges from all uses of a copy to
/// the one use that defines the copy's source vreg, most likely an induction
/// variable increment.
class CopyConstrain : public ScheduleDAGMutation {
@@ -1724,7 +1708,7 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
// If GlobalSegment is killed at the LocalLI->start, the call to find()
// returned the next global segment. But if GlobalSegment overlaps with
- // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
+ // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
// exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
if (GlobalSegment->contains(LocalLI->beginIndex()))
++GlobalSegment;
@@ -1788,23 +1772,23 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
return;
GlobalUses.push_back(Pred.getSUnit());
}
- DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
+ LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
// Add the weak edges.
for (SmallVectorImpl<SUnit*>::const_iterator
I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
- DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
- << GlobalSU->NodeNum << ")\n");
+ LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
+ << GlobalSU->NodeNum << ")\n");
DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
}
for (SmallVectorImpl<SUnit*>::const_iterator
I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
- DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
- << FirstLocalSU->NodeNum << ")\n");
+ LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
+ << FirstLocalSU->NodeNum << ")\n");
DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
}
}
-/// \brief Callback from DAG postProcessing to create weak edges to encourage
+/// Callback from DAG postProcessing to create weak edges to encourage
/// copy elimination.
void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
@@ -1941,7 +1925,7 @@ getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
/// The scheduler supports two modes of hazard recognition. The first is the
/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
/// supports highly complicated in-order reservation tables
-/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
+/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
///
/// The second is a streamlined mechanism that checks for hazards based on
/// simple counters that the scheduler itself maintains. It explicitly checks
@@ -1957,16 +1941,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
- DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
- << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
+ LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
+ << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
return true;
}
if (CurrMOps > 0 &&
((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
(!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
- DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
- << (isTop()? "begin" : "end") << " group\n");
+ LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
+ << (isTop() ? "begin" : "end") << " group\n");
return true;
}
@@ -1982,9 +1966,9 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
#ifndef NDEBUG
MaxObservedStall = std::max(Cycles, MaxObservedStall);
#endif
- DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
- << SchedModel->getResourceName(ResIdx)
- << "=" << NRCycle << "c\n");
+ LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
+ << SchedModel->getResourceName(ResIdx) << "="
+ << NRCycle << "c\n");
return true;
}
}
@@ -2005,8 +1989,8 @@ findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
}
}
if (LateSU) {
- DEBUG(dbgs() << Available.getName() << " RemLatency SU("
- << LateSU->NodeNum << ") " << RemLatency << "c\n");
+ LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
+ << LateSU->NodeNum << ") " << RemLatency << "c\n");
}
return RemLatency;
}
@@ -2022,8 +2006,8 @@ getOtherResourceCount(unsigned &OtherCritIdx) {
unsigned OtherCritCount = Rem->RemIssueCount
+ (RetiredMOps * SchedModel->getMicroOpFactor());
- DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
- << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
+ LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
+ << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
PIdx != PEnd; ++PIdx) {
unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
@@ -2033,9 +2017,10 @@ getOtherResourceCount(unsigned &OtherCritIdx) {
}
}
if (OtherCritIdx) {
- DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: "
- << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
- << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
+ LLVM_DEBUG(
+ dbgs() << " " << Available.getName() << " + Remain CritRes: "
+ << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
+ << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
}
return OtherCritCount;
}
@@ -2099,7 +2084,8 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) {
checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
getScheduledLatency());
- DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
+ << '\n');
}
void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
@@ -2119,8 +2105,8 @@ unsigned SchedBoundary::
countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
unsigned Factor = SchedModel->getResourceFactor(PIdx);
unsigned Count = Factor * Cycles;
- DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx)
- << " +" << Cycles << "x" << Factor << "u\n");
+ LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
+ << Cycles << "x" << Factor << "u\n");
// Update Executed resources counts.
incExecutedResources(PIdx, Count);
@@ -2131,16 +2117,17 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
// becomes the critical resource.
if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
ZoneCritResIdx = PIdx;
- DEBUG(dbgs() << " *** Critical resource "
- << SchedModel->getResourceName(PIdx) << ": "
- << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
+ LLVM_DEBUG(dbgs() << " *** Critical resource "
+ << SchedModel->getResourceName(PIdx) << ": "
+ << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
+ << "c\n");
}
// For reserved resources, record the highest cycle using the resource.
unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
if (NextAvailable > CurrCycle) {
- DEBUG(dbgs() << " Resource conflict: "
- << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
- << NextAvailable << "\n");
+ LLVM_DEBUG(dbgs() << " Resource conflict: "
+ << SchedModel->getProcResource(PIdx)->Name
+ << " reserved until @" << NextAvailable << "\n");
}
return NextAvailable;
}
@@ -2165,7 +2152,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
"Cannot schedule this instruction's MicroOps in the current cycle.");
unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
- DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
+ LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
unsigned NextCycle = CurrCycle;
switch (SchedModel->getMicroOpBufferSize()) {
@@ -2175,7 +2162,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
case 1:
if (ReadyCycle > NextCycle) {
NextCycle = ReadyCycle;
- DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
+ LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
}
break;
default:
@@ -2204,8 +2191,9 @@ void SchedBoundary::bumpNode(SUnit *SU) {
if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
>= (int)SchedModel->getLatencyFactor()) {
ZoneCritResIdx = 0;
- DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
- << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
+ LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
+ << ScaledMOps / SchedModel->getLatencyFactor()
+ << "c\n");
}
}
for (TargetSchedModel::ProcResIter
@@ -2241,13 +2229,13 @@ void SchedBoundary::bumpNode(SUnit *SU) {
unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
if (SU->getDepth() > TopLatency) {
TopLatency = SU->getDepth();
- DEBUG(dbgs() << " " << Available.getName()
- << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
+ LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
+ << SU->NodeNum << ") " << TopLatency << "c\n");
}
if (SU->getHeight() > BotLatency) {
BotLatency = SU->getHeight();
- DEBUG(dbgs() << " " << Available.getName()
- << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
+ LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
+ << SU->NodeNum << ") " << BotLatency << "c\n");
}
// If we stall for any reason, bump the cycle.
if (NextCycle > CurrCycle)
@@ -2271,17 +2259,17 @@ void SchedBoundary::bumpNode(SUnit *SU) {
// currCycle to X.
if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
(!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
- DEBUG(dbgs() << " Bump cycle to "
- << (isTop() ? "end" : "begin") << " group\n");
+ LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
+ << " group\n");
bumpCycle(++NextCycle);
}
while (CurrMOps >= SchedModel->getIssueWidth()) {
- DEBUG(dbgs() << " *** Max MOps " << CurrMOps
- << " at cycle " << CurrCycle << '\n');
+ LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
+ << CurrCycle << '\n');
bumpCycle(++NextCycle);
}
- DEBUG(dumpScheduledState());
+ LLVM_DEBUG(dumpScheduledState());
}
/// Release pending ready nodes in to the available queue. This makes them
@@ -2354,8 +2342,8 @@ SUnit *SchedBoundary::pickOnlyChoice() {
releasePending();
}
- DEBUG(Pending.dump());
- DEBUG(Available.dump());
+ LLVM_DEBUG(Pending.dump());
+ LLVM_DEBUG(Available.dump());
if (Available.size() == 1)
return *Available.begin();
@@ -2453,27 +2441,24 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
if (!OtherResLimited) {
if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
Policy.ReduceLatency |= true;
- DEBUG(dbgs() << " " << CurrZone.Available.getName()
- << " RemainingLatency " << RemLatency << " + "
- << CurrZone.getCurrCycle() << "c > CritPath "
- << Rem.CriticalPath << "\n");
+ LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
+ << " RemainingLatency " << RemLatency << " + "
+ << CurrZone.getCurrCycle() << "c > CritPath "
+ << Rem.CriticalPath << "\n");
}
}
// If the same resource is limiting inside and outside the zone, do nothing.
if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
return;
- DEBUG(
- if (CurrZone.isResourceLimited()) {
- dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
- << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
- << "\n";
- }
- if (OtherResLimited)
- dbgs() << " RemainingLimit: "
- << SchedModel->getResourceName(OtherCritIdx) << "\n";
- if (!CurrZone.isResourceLimited() && !OtherResLimited)
- dbgs() << " Latency limited both directions.\n");
+ LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
+ dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
+ << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
+ } if (OtherResLimited) dbgs()
+ << " RemainingLimit: "
+ << SchedModel->getResourceName(OtherCritIdx) << "\n";
+ if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
+ << " Latency limited both directions.\n");
if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
@@ -2560,11 +2545,12 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
}
#endif
+namespace llvm {
/// Return true if this heuristic determines order.
-static bool tryLess(int TryVal, int CandVal,
- GenericSchedulerBase::SchedCandidate &TryCand,
- GenericSchedulerBase::SchedCandidate &Cand,
- GenericSchedulerBase::CandReason Reason) {
+bool tryLess(int TryVal, int CandVal,
+ GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ GenericSchedulerBase::CandReason Reason) {
if (TryVal < CandVal) {
TryCand.Reason = Reason;
return true;
@@ -2577,10 +2563,10 @@ static bool tryLess(int TryVal, int CandVal,
return false;
}
-static bool tryGreater(int TryVal, int CandVal,
- GenericSchedulerBase::SchedCandidate &TryCand,
- GenericSchedulerBase::SchedCandidate &Cand,
- GenericSchedulerBase::CandReason Reason) {
+bool tryGreater(int TryVal, int CandVal,
+ GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ GenericSchedulerBase::CandReason Reason) {
if (TryVal > CandVal) {
TryCand.Reason = Reason;
return true;
@@ -2593,9 +2579,9 @@ static bool tryGreater(int TryVal, int CandVal,
return false;
}
-static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
- GenericSchedulerBase::SchedCandidate &Cand,
- SchedBoundary &Zone) {
+bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ SchedBoundary &Zone) {
if (Zone.isTop()) {
if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
@@ -2617,10 +2603,11 @@ static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
}
return false;
}
+} // end namespace llvm
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
- DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
- << GenericSchedulerBase::getReasonStr(Reason) << '\n');
+ LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
+ << GenericSchedulerBase::getReasonStr(Reason) << '\n');
}
static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
@@ -2742,14 +2729,14 @@ void GenericScheduler::checkAcyclicLatency() {
Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
- DEBUG(dbgs() << "IssueCycles="
- << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
- << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
- << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
- << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
- << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
- if (Rem.IsAcyclicLatencyLimited)
- dbgs() << " ACYCLIC LATENCY LIMIT\n");
+ LLVM_DEBUG(
+ dbgs() << "IssueCycles="
+ << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
+ << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
+ << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
+ << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
+ << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
+ if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
}
void GenericScheduler::registerRoots() {
@@ -2760,7 +2747,7 @@ void GenericScheduler::registerRoots() {
if (SU->getDepth() > Rem.CriticalPath)
Rem.CriticalPath = SU->getDepth();
}
- DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
+ LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
if (DumpCriticalPathLength) {
errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
}
@@ -2771,13 +2758,14 @@ void GenericScheduler::registerRoots() {
}
}
-static bool tryPressure(const PressureChange &TryP,
- const PressureChange &CandP,
- GenericSchedulerBase::SchedCandidate &TryCand,
- GenericSchedulerBase::SchedCandidate &Cand,
- GenericSchedulerBase::CandReason Reason,
- const TargetRegisterInfo *TRI,
- const MachineFunction &MF) {
+namespace llvm {
+bool tryPressure(const PressureChange &TryP,
+ const PressureChange &CandP,
+ GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ GenericSchedulerBase::CandReason Reason,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) {
// If one candidate decreases and the other increases, go with it.
// Invalid candidates have UnitInc==0.
if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
@@ -2810,7 +2798,7 @@ static bool tryPressure(const PressureChange &TryP,
return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
}
-static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
+unsigned getWeakLeft(const SUnit *SU, bool isTop) {
return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
}
@@ -2821,7 +2809,7 @@ static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
/// with the operation that produces or consumes the physreg. We'll do this when
/// regalloc has support for parallel copies.
-static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
+int biasPhysRegCopy(const SUnit *SU, bool isTop) {
const MachineInstr *MI = SU->getInstr();
if (!MI->isCopy())
return 0;
@@ -2841,6 +2829,7 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
return AtBoundary ? -1 : 1;
return 0;
}
+} // end namespace llvm
void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
bool AtTop,
@@ -2873,13 +2862,13 @@ void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
}
}
}
- DEBUG(if (Cand.RPDelta.Excess.isValid())
- dbgs() << " Try SU(" << Cand.SU->NodeNum << ") "
- << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet())
- << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n");
+ LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
+ << " Try SU(" << Cand.SU->NodeNum << ") "
+ << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
+ << Cand.RPDelta.Excess.getUnitInc() << "\n");
}
-/// Apply a set of heursitics to a new candidate. Heuristics are currently
+/// Apply a set of heuristics to a new candidate. Heuristics are currently
/// hierarchical. This may be more efficient than a graduated cost model because
/// we don't need to evaluate all aspects of the model for each node in the
/// queue. But it's really done to make the heuristics easier to debug and
@@ -2891,7 +2880,7 @@ void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
// if Cand is from a different zone than TryCand.
void GenericScheduler::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
- SchedBoundary *Zone) {
+ SchedBoundary *Zone) const {
// Initialize the candidate if needed.
if (!Cand.isValid()) {
TryCand.Reason = NodeOrder;
@@ -3017,7 +3006,7 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
if (TryCand.ResDelta == SchedResourceDelta())
TryCand.initResourceDelta(DAG, SchedModel);
Cand.setBest(TryCand);
- DEBUG(traceCandidate(Cand));
+ LLVM_DEBUG(traceCandidate(Cand));
}
}
}
@@ -3046,14 +3035,14 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
// See if BotCand is still valid (because we previously scheduled from Top).
- DEBUG(dbgs() << "Picking from Bot:\n");
+ LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
if (!BotCand.isValid() || BotCand.SU->isScheduled ||
BotCand.Policy != BotPolicy) {
BotCand.reset(CandPolicy());
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
assert(BotCand.Reason != NoCand && "failed to find the first candidate");
} else {
- DEBUG(traceCandidate(BotCand));
+ LLVM_DEBUG(traceCandidate(BotCand));
#ifndef NDEBUG
if (VerifyScheduling) {
SchedCandidate TCand;
@@ -3066,14 +3055,14 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
}
// Check if the top Q has a better candidate.
- DEBUG(dbgs() << "Picking from Top:\n");
+ LLVM_DEBUG(dbgs() << "Picking from Top:\n");
if (!TopCand.isValid() || TopCand.SU->isScheduled ||
TopCand.Policy != TopPolicy) {
TopCand.reset(CandPolicy());
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
assert(TopCand.Reason != NoCand && "failed to find the first candidate");
} else {
- DEBUG(traceCandidate(TopCand));
+ LLVM_DEBUG(traceCandidate(TopCand));
#ifndef NDEBUG
if (VerifyScheduling) {
SchedCandidate TCand;
@@ -3093,7 +3082,7 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
tryCandidate(Cand, TopCand, nullptr);
if (TopCand.Reason != NoCand) {
Cand.setBest(TopCand);
- DEBUG(traceCandidate(Cand));
+ LLVM_DEBUG(traceCandidate(Cand));
}
IsTopNode = Cand.AtTop;
@@ -3142,7 +3131,8 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
if (SU->isBottomReady())
Bot.removeReady(SU);
- DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
+ LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
+ << *SU->getInstr());
return SU;
}
@@ -3163,8 +3153,8 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
MachineInstr *Copy = DepSU->getInstr();
if (!Copy->isCopy())
continue;
- DEBUG(dbgs() << " Rescheduling physreg copy ";
- Dep.getSUnit()->dump(DAG));
+ LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
+ Dep.getSUnit()->dump(DAG));
DAG->moveInstruction(Copy, InsertPos);
}
}
@@ -3243,13 +3233,13 @@ void PostGenericScheduler::registerRoots() {
if (SU->getDepth() > Rem.CriticalPath)
Rem.CriticalPath = SU->getDepth();
}
- DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
+ LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
if (DumpCriticalPathLength) {
errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
}
}
-/// Apply a set of heursitics to a new candidate for PostRA scheduling.
+/// Apply a set of heuristics to a new candidate for PostRA scheduling.
///
/// \param Cand provides the policy and current best candidate.
/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
@@ -3301,7 +3291,7 @@ void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
tryCandidate(Cand, TryCand);
if (TryCand.Reason != NoCand) {
Cand.setBest(TryCand);
- DEBUG(traceCandidate(Cand));
+ LLVM_DEBUG(traceCandidate(Cand));
}
}
}
@@ -3333,7 +3323,8 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
IsTopNode = true;
Top.removeReady(SU);
- DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
+ LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
+ << *SU->getInstr());
return SU;
}
@@ -3355,7 +3346,7 @@ ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
namespace {
-/// \brief Order nodes by the ILP metric.
+/// Order nodes by the ILP metric.
struct ILPOrder {
const SchedDFSResult *DFSResult = nullptr;
const BitVector *ScheduledTrees = nullptr;
@@ -3363,7 +3354,7 @@ struct ILPOrder {
ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
- /// \brief Apply a less-than relation on node priority.
+ /// Apply a less-than relation on node priority.
///
/// (Return true if A comes after B in the Q.)
bool operator()(const SUnit *A, const SUnit *B) const {
@@ -3388,7 +3379,7 @@ struct ILPOrder {
}
};
-/// \brief Schedule based on the ILP metric.
+/// Schedule based on the ILP metric.
class ILPScheduler : public MachineSchedStrategy {
ScheduleDAGMILive *DAG = nullptr;
ILPOrder Cmp;
@@ -3422,16 +3413,19 @@ public:
SUnit *SU = ReadyQ.back();
ReadyQ.pop_back();
IsTopNode = false;
- DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
- << " ILP: " << DAG->getDFSResult()->getILP(SU)
- << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
- << DAG->getDFSResult()->getSubtreeLevel(
- DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
- << "Scheduling " << *SU->getInstr());
+ LLVM_DEBUG(dbgs() << "Pick node "
+ << "SU(" << SU->NodeNum << ") "
+ << " ILP: " << DAG->getDFSResult()->getILP(SU)
+ << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
+ << " @"
+ << DAG->getDFSResult()->getSubtreeLevel(
+ DAG->getDFSResult()->getSubtreeID(SU))
+ << '\n'
+ << "Scheduling " << *SU->getInstr());
return SU;
}
- /// \brief Scheduler callback to notify that a new subtree is scheduled.
+ /// Scheduler callback to notify that a new subtree is scheduled.
void scheduleTree(unsigned SubtreeID) override {
std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
}
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index bedfdd84b1ca..354f46e9e625 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -77,6 +77,7 @@ static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
STATISTIC(NumSunk, "Number of machine instructions sunk");
STATISTIC(NumSplit, "Number of critical edges split");
STATISTIC(NumCoalesces, "Number of copies coalesced");
+STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
namespace {
@@ -138,7 +139,7 @@ namespace {
MachineBasicBlock *From,
MachineBasicBlock *To);
- /// \brief Postpone the splitting of the given critical
+ /// Postpone the splitting of the given critical
/// edge (\p From, \p To).
///
/// We do not split the edges on the fly. Indeed, this invalidates
@@ -210,8 +211,8 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
if (DefMI->isCopyLike())
return false;
- DEBUG(dbgs() << "Coalescing: " << *DefMI);
- DEBUG(dbgs() << "*** to: " << MI);
+ LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ LLVM_DEBUG(dbgs() << "*** to: " << MI);
MRI->replaceRegWith(DstReg, SrcReg);
MI.eraseFromParent();
@@ -295,7 +296,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
- DEBUG(dbgs() << "******** Machine Sinking ********\n");
+ LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
@@ -322,14 +323,14 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
for (auto &Pair : ToSplit) {
auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);
if (NewSucc != nullptr) {
- DEBUG(dbgs() << " *** Splitting critical edge: "
- << printMBBReference(*Pair.first) << " -- "
- << printMBBReference(*NewSucc) << " -- "
- << printMBBReference(*Pair.second) << '\n');
+ LLVM_DEBUG(dbgs() << " *** Splitting critical edge: "
+ << printMBBReference(*Pair.first) << " -- "
+ << printMBBReference(*NewSucc) << " -- "
+ << printMBBReference(*Pair.second) << '\n');
MadeChange = true;
++NumSplit;
} else
- DEBUG(dbgs() << " *** Not legal to break critical edge\n");
+ LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n");
}
// If this iteration over the code changed anything, keep iterating.
if (!MadeChange) break;
@@ -371,7 +372,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
if (!ProcessedBegin)
--I;
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
continue;
bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
@@ -708,7 +709,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
return SuccToSinkTo;
}
-/// \brief Return true if MI is likely to be usable as a memory operation by the
+/// Return true if MI is likely to be usable as a memory operation by the
/// implicit null check optimization.
///
/// This is a "best effort" heuristic, and should not be relied upon for
@@ -752,6 +753,37 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
MBP.LHS.getReg() == BaseReg;
}
+/// Sink an instruction and its associated debug instructions.
+static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
+ MachineBasicBlock::iterator InsertPos) {
+ // Collect matching debug values.
+ SmallVector<MachineInstr *, 2> DbgValuesToSink;
+ collectDebugValues(MI, DbgValuesToSink);
+
+ // If we cannot find a location to use (merge with), then we erase the debug
+ // location to prevent debug-info driven tools from potentially reporting
+ // wrong location information.
+ if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end())
+ MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(),
+ InsertPos->getDebugLoc()));
+ else
+ MI.setDebugLoc(DebugLoc());
+
+ // Move the instruction.
+ MachineBasicBlock *ParentBlock = MI.getParent();
+ SuccToSinkTo.splice(InsertPos, ParentBlock, MI,
+ ++MachineBasicBlock::iterator(MI));
+
+ // Move previously adjacent debug value instructions to the insert position.
+ for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(),
+ DBE = DbgValuesToSink.end();
+ DBI != DBE; ++DBI) {
+ MachineInstr *DbgMI = *DBI;
+ SuccToSinkTo.splice(InsertPos, ParentBlock, DbgMI,
+ ++MachineBasicBlock::iterator(DbgMI));
+ }
+}
+
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
@@ -803,7 +835,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
return false;
}
- DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
+ LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
// If the block has multiple predecessors, this is a critical edge.
// Decide if we can sink along it or need to break the edge.
@@ -813,26 +845,26 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
bool TryBreak = false;
bool store = true;
if (!MI.isSafeToMove(AA, store)) {
- DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
+ LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
TryBreak = true;
}
// We don't want to sink across a critical edge if we don't dominate the
// successor. We could be introducing calculations to new code paths.
if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
- DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
+ LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
TryBreak = true;
}
// Don't sink instructions into a loop.
if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
- DEBUG(dbgs() << " *** NOTE: Loop header found\n");
+ LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n");
TryBreak = true;
}
// Otherwise we are OK with sinking along a critical edge.
if (!TryBreak)
- DEBUG(dbgs() << "Sinking along critical edge.\n");
+ LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n");
else {
// Mark this edge as to be split.
// If the edge can actually be split, the next iteration of the main loop
@@ -840,8 +872,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
bool Status =
PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
if (!Status)
- DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
- "break critical edge\n");
+ LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
// The instruction will not be sunk this time.
return false;
}
@@ -854,8 +886,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
bool Status = PostponeSplitCriticalEdge(MI, ParentBlock,
SuccToSinkTo, BreakPHIEdge);
if (!Status)
- DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
- "break critical edge\n");
+ LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
// The instruction will not be sunk this time.
return false;
}
@@ -865,30 +897,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
++InsertPos;
- // collect matching debug values.
- SmallVector<MachineInstr *, 2> DbgValuesToSink;
- collectDebugValues(MI, DbgValuesToSink);
-
- // Merge or erase debug location to ensure consistent stepping in profilers
- // and debuggers.
- if (!SuccToSinkTo->empty() && InsertPos != SuccToSinkTo->end())
- MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(),
- InsertPos->getDebugLoc()));
- else
- MI.setDebugLoc(DebugLoc());
-
-
- // Move the instruction.
- SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
- ++MachineBasicBlock::iterator(MI));
-
- // Move previously adjacent debug value instructions to the insert position.
- for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(),
- DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) {
- MachineInstr *DbgMI = *DBI;
- SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI,
- ++MachineBasicBlock::iterator(DbgMI));
- }
+ performSink(MI, *SuccToSinkTo, InsertPos);
// Conservatively, clear any kill flags, since it's possible that they are no
// longer correct.
@@ -902,3 +911,282 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
return true;
}
+
+//===----------------------------------------------------------------------===//
+// This pass is not intended to be a replacement or a complete alternative
+// for the pre-ra machine sink pass. It is only designed to sink COPY
+// instructions which should be handled after RA.
+//
+// This pass sinks COPY instructions into a successor block, if the COPY is not
+// used in the current block and the COPY is live-in to a single successor
+// (i.e., doesn't require the COPY to be duplicated). This avoids executing the
+// copy on paths where their results aren't needed. This also exposes
+// additional opportunites for dead copy elimination and shrink wrapping.
+//
+// These copies were either not handled by or are inserted after the MachineSink
+// pass. As an example of the former case, the MachineSink pass cannot sink
+// COPY instructions with allocatable source registers; for AArch64 these type
+// of copy instructions are frequently used to move function parameters (PhyReg)
+// into virtual registers in the entry block.
+//
+// For the machine IR below, this pass will sink %w19 in the entry into its
+// successor (%bb.1) because %w19 is only live-in in %bb.1.
+// %bb.0:
+// %wzr = SUBSWri %w1, 1
+// %w19 = COPY %w0
+// Bcc 11, %bb.2
+// %bb.1:
+// Live Ins: %w19
+// BL @fun
+// %w0 = ADDWrr %w0, %w19
+// RET %w0
+// %bb.2:
+// %w0 = COPY %wzr
+// RET %w0
+// As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
+// able to see %bb.0 as a candidate.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class PostRAMachineSinking : public MachineFunctionPass {
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+ PostRAMachineSinking() : MachineFunctionPass(ID) {}
+ StringRef getPassName() const override { return "PostRA Machine Sink"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+private:
+ /// Track which register units have been modified and used.
+ LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+
+ /// Sink Copy instructions unused in the same block close to their uses in
+ /// successors.
+ bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
+ const TargetRegisterInfo *TRI, const TargetInstrInfo *TII);
+};
+} // namespace
+
+char PostRAMachineSinking::ID = 0;
+char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID;
+
+INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink",
+ "PostRA Machine Sink", false, false)
+
+static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg,
+ const TargetRegisterInfo *TRI) {
+ LiveRegUnits LiveInRegUnits(*TRI);
+ LiveInRegUnits.addLiveIns(MBB);
+ return !LiveInRegUnits.available(Reg);
+}
+
+static MachineBasicBlock *
+getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
+ const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
+ unsigned Reg, const TargetRegisterInfo *TRI) {
+ // Try to find a single sinkable successor in which Reg is live-in.
+ MachineBasicBlock *BB = nullptr;
+ for (auto *SI : SinkableBBs) {
+ if (aliasWithRegsInLiveIn(*SI, Reg, TRI)) {
+ // If BB is set here, Reg is live-in to at least two sinkable successors,
+ // so quit.
+ if (BB)
+ return nullptr;
+ BB = SI;
+ }
+ }
+ // Reg is not live-in to any sinkable successors.
+ if (!BB)
+ return nullptr;
+
+ // Check if any register aliased with Reg is live-in in other successors.
+ for (auto *SI : CurBB.successors()) {
+ if (!SinkableBBs.count(SI) && aliasWithRegsInLiveIn(*SI, Reg, TRI))
+ return nullptr;
+ }
+ return BB;
+}
+
+static MachineBasicBlock *
+getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
+ const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
+ ArrayRef<unsigned> DefedRegsInCopy,
+ const TargetRegisterInfo *TRI) {
+ MachineBasicBlock *SingleBB = nullptr;
+ for (auto DefReg : DefedRegsInCopy) {
+ MachineBasicBlock *BB =
+ getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI);
+ if (!BB || (SingleBB && SingleBB != BB))
+ return nullptr;
+ SingleBB = BB;
+ }
+ return SingleBB;
+}
+
+static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
+ SmallVectorImpl<unsigned> &UsedOpsInCopy,
+ LiveRegUnits &UsedRegUnits,
+ const TargetRegisterInfo *TRI) {
+ for (auto U : UsedOpsInCopy) {
+ MachineOperand &MO = MI->getOperand(U);
+ unsigned SrcReg = MO.getReg();
+ if (!UsedRegUnits.available(SrcReg)) {
+ MachineBasicBlock::iterator NI = std::next(MI->getIterator());
+ for (MachineInstr &UI : make_range(NI, CurBB.end())) {
+ if (UI.killsRegister(SrcReg, TRI)) {
+ UI.clearRegisterKills(SrcReg, TRI);
+ MO.setIsKill(true);
+ break;
+ }
+ }
+ }
+ }
+}
+
+static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
+ SmallVectorImpl<unsigned> &UsedOpsInCopy,
+ SmallVectorImpl<unsigned> &DefedRegsInCopy) {
+ for (auto DefReg : DefedRegsInCopy)
+ SuccBB->removeLiveIn(DefReg);
+ for (auto U : UsedOpsInCopy) {
+ unsigned Reg = MI->getOperand(U).getReg();
+ if (!SuccBB->isLiveIn(Reg))
+ SuccBB->addLiveIn(Reg);
+ }
+}
+
+static bool hasRegisterDependency(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &UsedOpsInCopy,
+ SmallVectorImpl<unsigned> &DefedRegsInCopy,
+ LiveRegUnits &ModifiedRegUnits,
+ LiveRegUnits &UsedRegUnits) {
+ bool HasRegDependency = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) {
+ HasRegDependency = true;
+ break;
+ }
+ DefedRegsInCopy.push_back(Reg);
+
+ // FIXME: instead of isUse(), readsReg() would be a better fix here,
+ // For example, we can ignore modifications in reg with undef. However,
+ // it's not perfectly clear if skipping the internal read is safe in all
+ // other targets.
+ } else if (MO.isUse()) {
+ if (!ModifiedRegUnits.available(Reg)) {
+ HasRegDependency = true;
+ break;
+ }
+ UsedOpsInCopy.push_back(i);
+ }
+ }
+ return HasRegDependency;
+}
+
+bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
+ MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII) {
+ SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs;
+ // FIXME: For now, we sink only to a successor which has a single predecessor
+ // so that we can directly sink COPY instructions to the successor without
+ // adding any new block or branch instruction.
+ for (MachineBasicBlock *SI : CurBB.successors())
+ if (!SI->livein_empty() && SI->pred_size() == 1)
+ SinkableBBs.insert(SI);
+
+ if (SinkableBBs.empty())
+ return false;
+
+ bool Changed = false;
+
+ // Track which registers have been modified and used between the end of the
+ // block and the current instruction.
+ ModifiedRegUnits.clear();
+ UsedRegUnits.clear();
+
+ for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ if (MI->isDebugInstr())
+ continue;
+
+ // Do not move any instruction across function call.
+ if (MI->isCall())
+ return false;
+
+ if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) {
+ LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
+ TRI);
+ continue;
+ }
+
+ // Track the operand index for use in Copy.
+ SmallVector<unsigned, 2> UsedOpsInCopy;
+ // Track the register number defed in Copy.
+ SmallVector<unsigned, 2> DefedRegsInCopy;
+
+ // Don't sink the COPY if it would violate a register dependency.
+ if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
+ ModifiedRegUnits, UsedRegUnits)) {
+ LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
+ TRI);
+ continue;
+ }
+ assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) &&
+ "Unexpect SrcReg or DefReg");
+ MachineBasicBlock *SuccBB =
+ getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI);
+ // Don't sink if we cannot find a single sinkable successor in which Reg
+ // is live-in.
+ if (!SuccBB) {
+ LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
+ TRI);
+ continue;
+ }
+ assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
+ "Unexpected predecessor");
+
+ // Clear the kill flag if SrcReg is killed between MI and the end of the
+ // block.
+ clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
+ MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
+ performSink(*MI, *SuccBB, InsertPos);
+ updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
+
+ Changed = true;
+ ++NumPostRACopySink;
+ }
+ return Changed;
+}
+
+bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+
+ ModifiedRegUnits.init(*TRI);
+ UsedRegUnits.init(*TRI);
+ for (auto &BB : MF)
+ Changed |= tryToSinkCopy(BB, MF, TRI, TII);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index d81c6f8a31e1..b444cd31eba2 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -70,7 +70,7 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
TRI = ST.getRegisterInfo();
MRI = &MF->getRegInfo();
Loops = &getAnalysis<MachineLoopInfo>();
- SchedModel.init(ST.getSchedModel(), &ST, TII);
+ SchedModel.init(&ST);
BlockInfo.resize(MF->getNumBlockIDs());
ProcResourceCycles.resize(MF->getNumBlockIDs() *
SchedModel.getNumProcResourceKinds());
@@ -396,8 +396,8 @@ MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
}
void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
- DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
- << '\n');
+ LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
+ << '\n');
BlockInfo[MBB->getNumber()].invalidate();
for (unsigned i = 0; i != TS_NumStrategies; ++i)
if (Ensembles[i])
@@ -477,8 +477,8 @@ public:
/// Compute the trace through MBB.
void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
- DEBUG(dbgs() << "Computing " << getName() << " trace through "
- << printMBBReference(*MBB) << '\n');
+ LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
+ << printMBBReference(*MBB) << '\n');
// Set up loop bounds for the backwards post-order traversal.
LoopBounds Bounds(BlockInfo, MTM.Loops);
@@ -486,11 +486,11 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
Bounds.Downward = false;
Bounds.Visited.clear();
for (auto I : inverse_post_order_ext(MBB, Bounds)) {
- DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
+ LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
// All the predecessors have been visited, pick the preferred one.
TBI.Pred = pickTracePred(I);
- DEBUG({
+ LLVM_DEBUG({
if (TBI.Pred)
dbgs() << printMBBReference(*TBI.Pred) << '\n';
else
@@ -504,11 +504,11 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
Bounds.Downward = true;
Bounds.Visited.clear();
for (auto I : post_order_ext(MBB, Bounds)) {
- DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
+ LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
// All the successors have been visited, pick the preferred one.
TBI.Succ = pickTraceSucc(I);
- DEBUG({
+ LLVM_DEBUG({
if (TBI.Succ)
dbgs() << printMBBReference(*TBI.Succ) << '\n';
else
@@ -531,8 +531,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
WorkList.push_back(BadMBB);
do {
const MachineBasicBlock *MBB = WorkList.pop_back_val();
- DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
- << getName() << " height.\n");
+ LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
+ << getName() << " height.\n");
// Find any MBB predecessors that have MBB as their preferred successor.
// They are the only ones that need to be invalidated.
for (const MachineBasicBlock *Pred : MBB->predecessors()) {
@@ -556,8 +556,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
WorkList.push_back(BadMBB);
do {
const MachineBasicBlock *MBB = WorkList.pop_back_val();
- DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
- << getName() << " depth.\n");
+ LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
+ << getName() << " depth.\n");
// Find any MBB successors that have MBB as their preferred predecessor.
// They are the only ones that need to be invalidated.
for (const MachineBasicBlock *Succ : MBB->successors()) {
@@ -653,7 +653,7 @@ static bool getDataDeps(const MachineInstr &UseMI,
SmallVectorImpl<DataDep> &Deps,
const MachineRegisterInfo *MRI) {
// Debug values should not be included in any calculations.
- if (UseMI.isDebugValue())
+ if (UseMI.isDebugInstr())
return false;
bool HasPhysRegs = false;
@@ -813,9 +813,9 @@ updateDepth(MachineTraceMetrics::TraceBlockInfo &TBI, const MachineInstr &UseMI,
if (TBI.HasValidInstrHeights) {
// Update critical path length.
TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
- DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
+ LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
} else {
- DEBUG(dbgs() << Cycle << '\t' << UseMI);
+ LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI);
}
}
@@ -860,13 +860,13 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
// Go through trace blocks in top-down order, stopping after the center block.
while (!Stack.empty()) {
MBB = Stack.pop_back_val();
- DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
+ LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
TBI.HasValidInstrDepths = true;
TBI.CriticalPath = 0;
// Print out resource depths here as well.
- DEBUG({
+ LLVM_DEBUG({
dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
for (unsigned K = 0; K != PRDepths.size(); ++K)
@@ -1045,12 +1045,12 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
SmallVector<DataDep, 8> Deps;
for (;!Stack.empty(); Stack.pop_back()) {
MBB = Stack.back();
- DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
+ LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
TBI.HasValidInstrHeights = true;
TBI.CriticalPath = 0;
- DEBUG({
+ LLVM_DEBUG({
dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
for (unsigned K = 0; K != PRHeights.size(); ++K)
@@ -1081,7 +1081,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
if (!Deps.empty()) {
// Loop header PHI heights are all 0.
unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
- DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
+ LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
MTM.TII))
addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
@@ -1122,38 +1122,38 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
InstrCycles &MICycles = Cycles[&MI];
MICycles.Height = Cycle;
if (!TBI.HasValidInstrDepths) {
- DEBUG(dbgs() << Cycle << '\t' << MI);
+ LLVM_DEBUG(dbgs() << Cycle << '\t' << MI);
continue;
}
// Update critical path length.
TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
- DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
+ LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
}
// Update virtual live-in heights. They were added by addLiveIns() with a 0
// height because the final height isn't known until now.
- DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
for (LiveInReg &LIR : TBI.LiveIns) {
const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
LIR.Height = Heights.lookup(DefMI);
- DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);
+ LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);
}
// Transfer the live regunits to the live-in list.
for (SparseSet<LiveRegUnit>::const_iterator
RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
- DEBUG(dbgs() << ' ' << printRegUnit(RI->RegUnit, MTM.TRI)
- << '@' << RI->Cycle);
+ LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RI->RegUnit, MTM.TRI) << '@'
+ << RI->Cycle);
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
if (!TBI.HasValidInstrDepths)
continue;
// Add live-ins to the critical path length.
TBI.CriticalPath = std::max(TBI.CriticalPath,
computeCrossBlockCriticalPath(TBI));
- DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
+ LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
}
}
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index e0cc2ca9a2a2..d644e41abc5b 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -239,7 +239,8 @@ namespace {
void report(const char *msg, const MachineFunction *MF);
void report(const char *msg, const MachineBasicBlock *MBB);
void report(const char *msg, const MachineInstr *MI);
- void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+ void report(const char *msg, const MachineOperand *MO, unsigned MONum,
+ LLT MOVRegType = LLT{});
void report_context(const LiveInterval &LI) const;
void report_context(const LiveRange &LR, unsigned VRegUnit,
@@ -250,16 +251,16 @@ namespace {
void report_context_liverange(const LiveRange &LR) const;
void report_context_lanemask(LaneBitmask LaneMask) const;
void report_context_vreg(unsigned VReg) const;
- void report_context_vreg_regunit(unsigned VRegOrRegUnit) const;
+ void report_context_vreg_regunit(unsigned VRegOrUnit) const;
void verifyInlineAsm(const MachineInstr *MI);
void checkLiveness(const MachineOperand *MO, unsigned MONum);
void checkLivenessAtUse(const MachineOperand *MO, unsigned MONum,
- SlotIndex UseIdx, const LiveRange &LR, unsigned Reg,
+ SlotIndex UseIdx, const LiveRange &LR, unsigned VRegOrUnit,
LaneBitmask LaneMask = LaneBitmask::getNone());
void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum,
- SlotIndex DefIdx, const LiveRange &LR, unsigned Reg,
+ SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit,
LaneBitmask LaneMask = LaneBitmask::getNone());
void markReachable(const MachineBasicBlock *MBB);
@@ -359,11 +360,15 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
- isFunctionRegBankSelected = MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::RegBankSelected);
- isFunctionSelected = MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::Selected);
-
+ const bool isFunctionFailedISel = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel);
+ isFunctionRegBankSelected =
+ !isFunctionFailedISel &&
+ MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::RegBankSelected);
+ isFunctionSelected = !isFunctionFailedISel &&
+ MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Selected);
LiveVars = nullptr;
LiveInts = nullptr;
LiveStks = nullptr;
@@ -486,15 +491,14 @@ void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
if (Indexes && Indexes->hasIndex(*MI))
errs() << Indexes->getInstructionIndex(*MI) << '\t';
MI->print(errs(), /*SkipOpers=*/true);
- errs() << '\n';
}
-void MachineVerifier::report(const char *msg,
- const MachineOperand *MO, unsigned MONum) {
+void MachineVerifier::report(const char *msg, const MachineOperand *MO,
+ unsigned MONum, LLT MOVRegType) {
assert(MO);
report(msg, MO->getParent());
errs() << "- operand " << MONum << ": ";
- MO->print(errs(), TRI);
+ MO->print(errs(), MOVRegType, TRI);
errs() << "\n";
}
@@ -642,7 +646,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
!(AsmInfo &&
AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj &&
BB && isa<SwitchInst>(BB->getTerminator())) &&
- !isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
+ !isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
report("MBB has more than one landing pad successor", MBB);
// Call AnalyzeBranch. If it succeeds, there several more conditions to check.
@@ -873,11 +877,11 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
if (MI->getNumOperands() < MCID.getNumOperands()) {
report("Too few operands", MI);
errs() << MCID.getNumOperands() << " operands expected, but "
- << MI->getNumOperands() << " given.\n";
+ << MI->getNumOperands() << " given.\n";
}
if (MI->isPHI() && MF->getProperties().hasProperty(
- MachineFunctionProperties::Property::NoPHIs))
+ MachineFunctionProperties::Property::NoPHIs))
report("Found PHI instruction with NoPHIs property set", MI);
// Check the tied operands.
@@ -886,7 +890,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
// Check the MachineMemOperands for basic consistency.
for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
- E = MI->memoperands_end(); I != E; ++I) {
+ E = MI->memoperands_end();
+ I != E; ++I) {
if ((*I)->isLoad() && !MI->mayLoad())
report("Missing mayLoad flag", MI);
if ((*I)->isStore() && !MI->mayStore())
@@ -897,7 +902,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
// Other instructions must have one, unless they are inside a bundle.
if (LiveInts) {
bool mapped = !LiveInts->isNotInMIMap(*MI);
- if (MI->isDebugValue()) {
+ if (MI->isDebugInstr()) {
if (mapped)
report("Debug instruction has a slot index", MI);
} else if (MI->isInsideBundle()) {
@@ -909,32 +914,42 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
}
- // Check types.
if (isPreISelGenericOpcode(MCID.getOpcode())) {
if (isFunctionSelected)
report("Unexpected generic instruction in a Selected function", MI);
- // Generic instructions specify equality constraints between some
- // of their operands. Make sure these are consistent.
+ // Check types.
SmallVector<LLT, 4> Types;
- for (unsigned i = 0; i < MCID.getNumOperands(); ++i) {
- if (!MCID.OpInfo[i].isGenericType())
+ for (unsigned I = 0; I < MCID.getNumOperands(); ++I) {
+ if (!MCID.OpInfo[I].isGenericType())
continue;
- size_t TypeIdx = MCID.OpInfo[i].getGenericTypeIndex();
+ // Generic instructions specify type equality constraints between some of
+ // their operands. Make sure these are consistent.
+ size_t TypeIdx = MCID.OpInfo[I].getGenericTypeIndex();
Types.resize(std::max(TypeIdx + 1, Types.size()));
- LLT OpTy = MRI->getType(MI->getOperand(i).getReg());
- if (Types[TypeIdx].isValid() && Types[TypeIdx] != OpTy)
- report("type mismatch in generic instruction", MI);
- Types[TypeIdx] = OpTy;
+ const MachineOperand *MO = &MI->getOperand(I);
+ LLT OpTy = MRI->getType(MO->getReg());
+ // Don't report a type mismatch if there is no actual mismatch, only a
+ // type missing, to reduce noise:
+ if (OpTy.isValid()) {
+ // Only the first valid type for a type index will be printed: don't
+ // overwrite it later so it's always clear which type was expected:
+ if (!Types[TypeIdx].isValid())
+ Types[TypeIdx] = OpTy;
+ else if (Types[TypeIdx] != OpTy)
+ report("Type mismatch in generic instruction", MO, I, OpTy);
+ } else {
+ // Generic instructions must have types attached to their operands.
+ report("Generic instruction is missing a virtual register type", MO, I);
+ }
}
- }
- // Generic opcodes must not have physical register operands.
- if (isPreISelGenericOpcode(MCID.getOpcode())) {
- for (auto &Op : MI->operands()) {
- if (Op.isReg() && TargetRegisterInfo::isPhysicalRegister(Op.getReg()))
- report("Generic instruction cannot have physical register", MI);
+ // Generic opcodes must not have physical register operands.
+ for (unsigned I = 0; I < MI->getNumOperands(); ++I) {
+ const MachineOperand *MO = &MI->getOperand(I);
+ if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg()))
+ report("Generic instruction cannot have physical register", MO, I);
}
}
@@ -971,6 +986,88 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
MI);
break;
}
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC: {
+ // Number of operands and presense of types is already checked (and
+ // reported in case of any issues), so no need to report them again. As
+ // we're trying to report as many issues as possible at once, however, the
+ // instructions aren't guaranteed to have the right number of operands or
+ // types attached to them at this point
+ assert(MCID.getNumOperands() == 2 && "Expected 2 operands G_*{EXT,TRUNC}");
+ if (MI->getNumOperands() < MCID.getNumOperands())
+ break;
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isValid() || !SrcTy.isValid())
+ break;
+
+ LLT DstElTy = DstTy.isVector() ? DstTy.getElementType() : DstTy;
+ LLT SrcElTy = SrcTy.isVector() ? SrcTy.getElementType() : SrcTy;
+ if (DstElTy.isPointer() || SrcElTy.isPointer())
+ report("Generic extend/truncate can not operate on pointers", MI);
+
+ if (DstTy.isVector() != SrcTy.isVector()) {
+ report("Generic extend/truncate must be all-vector or all-scalar", MI);
+ // Generally we try to report as many issues as possible at once, but in
+ // this case it's not clear what should we be comparing the size of the
+ // scalar with: the size of the whole vector or its lane. Instead of
+ // making an arbitrary choice and emitting not so helpful message, let's
+ // avoid the extra noise and stop here.
+ break;
+ }
+ if (DstTy.isVector() && DstTy.getNumElements() != SrcTy.getNumElements())
+ report("Generic vector extend/truncate must preserve number of lanes",
+ MI);
+ unsigned DstSize = DstElTy.getSizeInBits();
+ unsigned SrcSize = SrcElTy.getSizeInBits();
+ switch (MI->getOpcode()) {
+ default:
+ if (DstSize <= SrcSize)
+ report("Generic extend has destination type no larger than source", MI);
+ break;
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_FPTRUNC:
+ if (DstSize >= SrcSize)
+ report("Generic truncate has destination type no smaller than source",
+ MI);
+ break;
+ }
+ break;
+ }
+ case TargetOpcode::COPY: {
+ if (foundErrors)
+ break;
+ const MachineOperand &DstOp = MI->getOperand(0);
+ const MachineOperand &SrcOp = MI->getOperand(1);
+ LLT DstTy = MRI->getType(DstOp.getReg());
+ LLT SrcTy = MRI->getType(SrcOp.getReg());
+ if (SrcTy.isValid() && DstTy.isValid()) {
+ // If both types are valid, check that the types are the same.
+ if (SrcTy != DstTy) {
+ report("Copy Instruction is illegal with mismatching types", MI);
+ errs() << "Def = " << DstTy << ", Src = " << SrcTy << "\n";
+ }
+ }
+ if (SrcTy.isValid() || DstTy.isValid()) {
+ // If one of them have valid types, let's just check they have the same
+ // size.
+ unsigned SrcSize = TRI->getRegSizeInBits(SrcOp.getReg(), *MRI);
+ unsigned DstSize = TRI->getRegSizeInBits(DstOp.getReg(), *MRI);
+ assert(SrcSize && "Expecting size here");
+ assert(DstSize && "Expecting size here");
+ if (SrcSize != DstSize)
+ if (!DstOp.getSubReg() && !SrcOp.getSubReg()) {
+ report("Copy Instruction is illegal with mismatching sizes", MI);
+ errs() << "Def Size = " << DstSize << ", Src Size = " << SrcSize
+ << "\n";
+ }
+ }
+ break;
+ }
case TargetOpcode::STATEPOINT:
if (!MI->getOperand(StatepointOpers::IDPos).isImm() ||
!MI->getOperand(StatepointOpers::NBytesPos).isImm() ||
@@ -1101,12 +1198,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
}
}
- if (MO->isRenamable() &&
- ((MO->isDef() && MI->hasExtraDefRegAllocReq()) ||
- (MO->isUse() && MI->hasExtraSrcRegAllocReq()))) {
- report("Illegal isRenamable setting for opcode with extra regalloc "
- "requirements",
- MO, MONum);
+ if (MO->isRenamable()) {
+ if (MRI->isReserved(Reg)) {
+ report("isRenamable set on reserved register", MO, MONum);
+ return;
+ }
+ }
+ if (MI->isDebugValue() && MO->isUse() && !MO->isDebug()) {
+ report("Use-reg is not IsDebug in a DBG_VALUE", MO, MONum);
return;
}
} else {
diff --git a/contrib/llvm/lib/CodeGen/MacroFusion.cpp b/contrib/llvm/lib/CodeGen/MacroFusion.cpp
index e7f426c469a0..62dadbba0c1a 100644
--- a/contrib/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/contrib/llvm/lib/CodeGen/MacroFusion.cpp
@@ -66,11 +66,11 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
if (SI.getSUnit() == &FirstSU)
SI.setLatency(0);
- DEBUG(dbgs() << "Macro fuse: ";
- FirstSU.print(dbgs(), &DAG); dbgs() << " - ";
- SecondSU.print(dbgs(), &DAG); dbgs() << " / ";
- dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " <<
- DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n'; );
+ LLVM_DEBUG(
+ dbgs() << "Macro fuse: "; FirstSU.print(dbgs(), &DAG); dbgs() << " - ";
+ SecondSU.print(dbgs(), &DAG); dbgs() << " / ";
+ dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - "
+ << DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n';);
// Make data dependencies from the FirstSU also dependent on the SecondSU to
// prevent them from being scheduled between the FirstSU and the SecondSU.
@@ -80,24 +80,32 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
if (SI.isWeak() || isHazard(SI) ||
SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(&SecondSU))
continue;
- DEBUG(dbgs() << " Bind ";
- SecondSU.print(dbgs(), &DAG); dbgs() << " - ";
- SU->print(dbgs(), &DAG); dbgs() << '\n';);
+ LLVM_DEBUG(dbgs() << " Bind "; SecondSU.print(dbgs(), &DAG);
+ dbgs() << " - "; SU->print(dbgs(), &DAG); dbgs() << '\n';);
DAG.addEdge(SU, SDep(&SecondSU, SDep::Artificial));
}
// Make the FirstSU also dependent on the dependencies of the SecondSU to
// prevent them from being scheduled between the FirstSU and the SecondSU.
- if (&FirstSU != &DAG.EntrySU)
+ if (&FirstSU != &DAG.EntrySU) {
for (const SDep &SI : SecondSU.Preds) {
SUnit *SU = SI.getSUnit();
if (SI.isWeak() || isHazard(SI) || &FirstSU == SU || FirstSU.isSucc(SU))
continue;
- DEBUG(dbgs() << " Bind ";
- SU->print(dbgs(), &DAG); dbgs() << " - ";
- FirstSU.print(dbgs(), &DAG); dbgs() << '\n';);
+ LLVM_DEBUG(dbgs() << " Bind "; SU->print(dbgs(), &DAG); dbgs() << " - ";
+ FirstSU.print(dbgs(), &DAG); dbgs() << '\n';);
DAG.addEdge(&FirstSU, SDep(SU, SDep::Artificial));
}
+ // ExitSU comes last by design, which acts like an implicit dependency
+ // between ExitSU and any bottom root in the graph. We should transfer
+ // this to FirstSU as well.
+ if (&SecondSU == &DAG.ExitSU) {
+ for (SUnit &SU : DAG.SUnits) {
+ if (SU.Succs.empty())
+ DAG.addEdge(&FirstSU, SDep(&SU, SDep::Artificial));
+ }
+ }
+ }
++NumFused;
return true;
@@ -105,7 +113,7 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
namespace {
-/// \brief Post-process the DAG to create cluster edges between instrs that may
+/// Post-process the DAG to create cluster edges between instrs that may
/// be fused by the processor into a single operation.
class MacroFusion : public ScheduleDAGMutation {
ShouldSchedulePredTy shouldScheduleAdjacent;
@@ -135,7 +143,7 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
scheduleAdjacentImpl(*DAG, DAG->ExitSU);
}
-/// \brief Implement the fusion of instr pairs in the scheduling DAG,
+/// Implement the fusion of instr pairs in the scheduling DAG,
/// anchored at the instr in AnchorSU..
bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) {
const MachineInstr &AnchorMI = *AnchorSU.getInstr();
diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
index 8972867ba083..befa8422d399 100644
--- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -45,7 +45,7 @@ namespace {
initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
}
- bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &Fn) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
index 54c5a940275d..7a5c20000066 100644
--- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -75,7 +75,7 @@ namespace {
initializePHIEliminationPass(*PassRegistry::getPassRegistry());
}
- bool runOnMachineFunction(MachineFunction &Fn) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
private:
@@ -91,7 +91,7 @@ namespace {
/// register which is used in a PHI node. We map that to the BB the
/// vreg is coming from. This is used later to determine when the vreg
/// is killed in the BB.
- void analyzePHINodes(const MachineFunction& Fn);
+ void analyzePHINodes(const MachineFunction& MF);
/// Split critical edges where necessary for good coalescer performance.
bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -270,7 +270,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
IncomingReg = entry;
reusedIncoming = true;
++NumReused;
- DEBUG(dbgs() << "Reusing " << printReg(IncomingReg) << " for " << *MPhi);
+ LLVM_DEBUG(dbgs() << "Reusing " << printReg(IncomingReg) << " for "
+ << *MPhi);
} else {
const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
@@ -295,9 +296,9 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// AfterPHIsIt, so it appears before the current PHICopy.
if (reusedIncoming)
if (MachineInstr *OldKill = VI.findKill(&MBB)) {
- DEBUG(dbgs() << "Remove old kill from " << *OldKill);
+ LLVM_DEBUG(dbgs() << "Remove old kill from " << *OldKill);
LV->removeVirtualRegisterKilled(IncomingReg, *OldKill);
- DEBUG(MBB.dump());
+ LLVM_DEBUG(MBB.dump());
}
// Add information to LiveVariables to know that the incoming value is
@@ -452,7 +453,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
KillInst = FirstTerm;
while (KillInst != opBlock.begin()) {
--KillInst;
- if (KillInst->isDebugValue())
+ if (KillInst->isDebugInstr())
continue;
if (KillInst->readsRegister(SrcReg))
break;
@@ -512,7 +513,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
KillInst = FirstTerm;
while (KillInst != opBlock.begin()) {
--KillInst;
- if (KillInst->isDebugValue())
+ if (KillInst->isDebugInstr())
continue;
if (KillInst->readsRegister(SrcReg))
break;
@@ -593,9 +594,9 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit)
continue;
if (ShouldSplit) {
- DEBUG(dbgs() << printReg(Reg) << " live-out before critical edge "
- << printMBBReference(*PreMBB) << " -> "
- << printMBBReference(MBB) << ": " << *BBI);
+ LLVM_DEBUG(dbgs() << printReg(Reg) << " live-out before critical edge "
+ << printMBBReference(*PreMBB) << " -> "
+ << printMBBReference(MBB) << ": " << *BBI);
}
// If Reg is not live-in to MBB, it means it must be live-in to some
@@ -610,10 +611,12 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
// Check for a loop exiting edge.
if (!ShouldSplit && CurLoop != PreLoop) {
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Split wouldn't help, maybe avoid loop copies?\n";
- if (PreLoop) dbgs() << "PreLoop: " << *PreLoop;
- if (CurLoop) dbgs() << "CurLoop: " << *CurLoop;
+ if (PreLoop)
+ dbgs() << "PreLoop: " << *PreLoop;
+ if (CurLoop)
+ dbgs() << "CurLoop: " << *CurLoop;
});
// This edge could be entering a loop, exiting a loop, or it could be
// both: Jumping directly form one loop to the header of a sibling
@@ -624,7 +627,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
if (!ShouldSplit && !SplitAllCriticalEdges)
continue;
if (!PreMBB->SplitCriticalEdge(&MBB, *this)) {
- DEBUG(dbgs() << "Failed to split critical edge.\n");
+ LLVM_DEBUG(dbgs() << "Failed to split critical edge.\n");
continue;
}
Changed = true;
diff --git a/contrib/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
index ff8680a0540d..bc3f2a6e9b5a 100644
--- a/contrib/llvm/lib/CodeGen/ParallelCG.cpp
+++ b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
@@ -30,7 +30,7 @@ static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
TargetMachine::CodeGenFileType FileType) {
std::unique_ptr<TargetMachine> TM = TMFactory();
legacy::PassManager CodeGenPasses;
- if (TM->addPassesToEmitFile(CodeGenPasses, OS, FileType))
+ if (TM->addPassesToEmitFile(CodeGenPasses, OS, nullptr, FileType))
report_fatal_error("Failed to setup codegen");
CodeGenPasses.run(*M);
}
@@ -44,7 +44,7 @@ std::unique_ptr<Module> llvm::splitCodeGen(
if (OSs.size() == 1) {
if (!BCOSs.empty())
- WriteBitcodeToFile(M.get(), *BCOSs[0]);
+ WriteBitcodeToFile(*M, *BCOSs[0]);
codegen(M.get(), *OSs[0], TMFactory, FileType);
return M;
}
@@ -66,7 +66,7 @@ std::unique_ptr<Module> llvm::splitCodeGen(
// FIXME: Provide a more direct way to do this in LLVM.
SmallString<0> BC;
raw_svector_ostream BCOS(BC);
- WriteBitcodeToFile(MPart.get(), BCOS);
+ WriteBitcodeToFile(*MPart, BCOS);
if (!BCOSs.empty()) {
BCOSs[ThreadCount]->write(BC.begin(), BC.size());
diff --git a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
index 0957705b19bb..afb4b0a7e174 100644
--- a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -49,6 +49,7 @@ static bool doesNotGeneratecode(const MachineInstr &MI) {
case TargetOpcode::EH_LABEL:
case TargetOpcode::GC_LABEL:
case TargetOpcode::DBG_VALUE:
+ case TargetOpcode::DBG_LABEL:
return true;
}
}
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 1320f9985553..1d058ccfb633 100644
--- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -202,7 +202,7 @@ namespace {
bool foldImmediate(MachineInstr &MI, SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
- /// \brief Finds recurrence cycles, but only ones that formulated around
+ /// Finds recurrence cycles, but only ones that formulated around
/// a def operand and a use operand that are tied. If there is a use
/// operand commutable with the tied use operand, find recurrence cycle
/// along that operand as well.
@@ -210,7 +210,7 @@ namespace {
const SmallSet<unsigned, 2> &TargetReg,
RecurrenceCycle &RC);
- /// \brief If copy instruction \p MI is a virtual register copy, track it in
+ /// If copy instruction \p MI is a virtual register copy, track it in
/// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
/// previously seen as a copy, replace the uses of this copy with the
/// previously seen copy's destination register.
@@ -221,7 +221,7 @@ namespace {
/// Is the register \p Reg a non-allocatable physical register?
bool isNAPhysCopy(unsigned Reg);
- /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical
+ /// If copy instruction \p MI is a non-allocatable virtual<->physical
/// register copy, track it in the \p NAPhysToVirtMIs map. If this
/// non-allocatable physical register was previously copied to a virtual
/// registered and hasn't been clobbered, the virt->phys copy can be
@@ -232,7 +232,7 @@ namespace {
bool isLoadFoldable(MachineInstr &MI,
SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
- /// \brief Check whether \p MI is understood by the register coalescer
+ /// Check whether \p MI is understood by the register coalescer
/// but may require some rewriting.
bool isCoalescableCopy(const MachineInstr &MI) {
// SubregToRegs are not interesting, because they are already register
@@ -242,7 +242,7 @@ namespace {
MI.isExtractSubreg()));
}
- /// \brief Check whether \p MI is a copy like instruction that is
+ /// Check whether \p MI is a copy like instruction that is
/// not recognized by the register coalescer.
bool isUncoalescableCopy(const MachineInstr &MI) {
return MI.isBitcast() ||
@@ -345,7 +345,7 @@ namespace {
}
};
- /// \brief Helper class to track the possible sources of a value defined by
+ /// Helper class to track the possible sources of a value defined by
/// a (chain of) copy related instructions.
/// Given a definition (instruction and definition index), this class
/// follows the use-def chain to find successive suitable sources.
@@ -425,7 +425,7 @@ namespace {
}
}
- /// \brief Following the use-def chain, get the next available source
+ /// Following the use-def chain, get the next available source
/// for the tracked value.
/// \return A ValueTrackerResult containing a set of registers
/// and sub registers with tracked values. A ValueTrackerResult with
@@ -646,7 +646,7 @@ bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {
return TII->optimizeCondBranch(MI);
}
-/// \brief Try to find the next source that share the same register file
+/// Try to find the next source that share the same register file
/// for the value defined by \p Reg and \p SubReg.
/// When true is returned, the \p RewriteMap can be used by the client to
/// retrieve all Def -> Use along the way up to the next source. Any found
@@ -696,7 +696,8 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
// An existent entry with multiple sources is a PHI cycle we must avoid.
// Otherwise it's an entry with a valid next source we already found.
if (CurSrcRes.getNumSources() > 1) {
- DEBUG(dbgs() << "findNextSource: found PHI cycle, aborting...\n");
+ LLVM_DEBUG(dbgs()
+ << "findNextSource: found PHI cycle, aborting...\n");
return false;
}
break;
@@ -709,7 +710,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
if (NumSrcs > 1) {
PHICount++;
if (PHICount >= RewritePHILimit) {
- DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
+ LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
return false;
}
@@ -746,7 +747,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
return CurSrcPair.Reg != Reg;
}
-/// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are
+/// Insert a PHI instruction with incoming edges \p SrcRegs that are
/// guaranteed to have the same register class. This is necessary whenever we
/// successfully traverse a PHI instruction and find suitable sources coming
/// from its edges. By inserting a new PHI, we provide a rewritten PHI def
@@ -791,7 +792,7 @@ public:
Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
virtual ~Rewriter() {}
- /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and
+ /// Get the next rewritable source (SrcReg, SrcSubReg) and
/// the related value that it affects (DstReg, DstSubReg).
/// A source is considered rewritable if its register class and the
/// register class of the related DstReg may not be register
@@ -859,7 +860,7 @@ public:
}
};
-/// \brief Helper class to rewrite uncoalescable copy like instructions
+/// Helper class to rewrite uncoalescable copy like instructions
/// into new COPY (coalescable friendly) instructions.
class UncoalescableRewriter : public Rewriter {
unsigned NumDefs; ///< Number of defs in the bitcast.
@@ -1101,7 +1102,7 @@ static Rewriter *getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII) {
}
}
-/// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find
+/// Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find
/// the new source to use for rewrite. If \p HandleMultipleSources is true and
/// multiple sources for a given \p Def are found along the way, we found a
/// PHI instructions that needs to be rewritten.
@@ -1143,9 +1144,9 @@ getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
// Build the new PHI node and return its def register as the new source.
MachineInstr &OrigPHI = const_cast<MachineInstr &>(*Res.getInst());
MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI);
- DEBUG(dbgs() << "-- getNewSource\n");
- DEBUG(dbgs() << " Replacing: " << OrigPHI);
- DEBUG(dbgs() << " With: " << NewPHI);
+ LLVM_DEBUG(dbgs() << "-- getNewSource\n");
+ LLVM_DEBUG(dbgs() << " Replacing: " << OrigPHI);
+ LLVM_DEBUG(dbgs() << " With: " << NewPHI);
const MachineOperand &MODef = NewPHI.getOperand(0);
return RegSubRegPair(MODef.getReg(), MODef.getSubReg());
}
@@ -1213,7 +1214,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
return Changed;
}
-/// \brief Rewrite the source found through \p Def, by using the \p RewriteMap
+/// Rewrite the source found through \p Def, by using the \p RewriteMap
/// and create a new COPY instruction. More info about RewriteMap in
/// PeepholeOptimizer::findNextSource. Right now this is only used to handle
/// Uncoalescable copies, since they are copy like instructions that aren't
@@ -1241,9 +1242,9 @@ PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
NewCopy->getOperand(0).setIsUndef();
}
- DEBUG(dbgs() << "-- RewriteSource\n");
- DEBUG(dbgs() << " Replacing: " << CopyLike);
- DEBUG(dbgs() << " With: " << *NewCopy);
+ LLVM_DEBUG(dbgs() << "-- RewriteSource\n");
+ LLVM_DEBUG(dbgs() << " Replacing: " << CopyLike);
+ LLVM_DEBUG(dbgs() << " With: " << *NewCopy);
MRI->replaceRegWith(Def.Reg, NewVReg);
MRI->clearKillFlags(NewVReg);
@@ -1254,7 +1255,7 @@ PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
return *NewCopy;
}
-/// \brief Optimize copy-like instructions to create
+/// Optimize copy-like instructions to create
/// register coalescer friendly instruction.
/// The optimization tries to kill-off the \p MI by looking
/// through a chain of copies to find a source that has a compatible
@@ -1462,7 +1463,8 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
if (PrevCopy == NAPhysToVirtMIs.end()) {
// We can't remove the copy: there was an intervening clobber of the
// non-allocatable physical register after the copy to virtual.
- DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << MI);
+ LLVM_DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing "
+ << MI);
return false;
}
@@ -1470,7 +1472,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
if (PrevDstReg == SrcReg) {
// Remove the virt->phys copy: we saw the virtual register definition, and
// the non-allocatable physical register's state hasn't changed since then.
- DEBUG(dbgs() << "NAPhysCopy: erasing " << MI);
+ LLVM_DEBUG(dbgs() << "NAPhysCopy: erasing " << MI);
++NumNAPhysCopies;
return true;
}
@@ -1479,7 +1481,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
// register get a copy of the non-allocatable physical register, and we only
// track one such copy. Avoid getting confused by this new non-allocatable
// physical register definition, and remove it from the tracked copies.
- DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI);
+ LLVM_DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI);
NAPhysToVirtMIs.erase(PrevCopy);
return false;
}
@@ -1575,15 +1577,15 @@ bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
// Commutes operands of instructions in RC if necessary so that the copy to
// be generated from PHI can be coalesced.
- DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
+ LLVM_DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
for (auto &RI : RC) {
- DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
+ LLVM_DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
auto CP = RI.getCommutePair();
if (CP) {
Changed = true;
TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
(*CP).second);
- DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
+ LLVM_DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
}
}
}
@@ -1595,8 +1597,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
- DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
- DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
+ LLVM_DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
if (DisablePeephole)
return false;
@@ -1643,8 +1645,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
++MII;
LocalMIs.insert(MI);
- // Skip debug values. They should not affect this peephole optimization.
- if (MI->isDebugValue())
+ // Skip debug instructions. They should not affect this peephole optimization.
+ if (MI->isDebugInstr())
continue;
if (MI->isPosition())
@@ -1667,7 +1669,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (Def != NAPhysToVirtMIs.end()) {
// A new definition of the non-allocatable physical register
// invalidates previous copies.
- DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI);
+ LLVM_DEBUG(dbgs()
+ << "NAPhysCopy: invalidating because of " << *MI);
NAPhysToVirtMIs.erase(Def);
}
}
@@ -1676,7 +1679,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
for (auto &RegMI : NAPhysToVirtMIs) {
unsigned Def = RegMI.first;
if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
- DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI);
+ LLVM_DEBUG(dbgs()
+ << "NAPhysCopy: invalidating because of " << *MI);
NAPhysToVirtMIs.erase(Def);
}
}
@@ -1692,7 +1696,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
// don't know what's correct anymore.
//
// FIXME: handle explicit asm clobbers.
- DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI);
+ LLVM_DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to "
+ << *MI);
NAPhysToVirtMIs.clear();
}
@@ -1768,8 +1773,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {
// Update LocalMIs since we replaced MI with FoldMI and deleted
// DefMI.
- DEBUG(dbgs() << "Replacing: " << *MI);
- DEBUG(dbgs() << " With: " << *FoldMI);
+ LLVM_DEBUG(dbgs() << "Replacing: " << *MI);
+ LLVM_DEBUG(dbgs() << " With: " << *FoldMI);
LocalMIs.erase(MI);
LocalMIs.erase(DefMI);
LocalMIs.insert(FoldMI);
@@ -1791,7 +1796,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
// the load candidates. Note: We might be able to fold *into* this
// instruction, so this needs to be after the folding logic.
if (MI->isLoadFoldBarrier()) {
- DEBUG(dbgs() << "Encountered load fold barrier on " << *MI);
+ LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI);
FoldAsLoadDefCandidates.clear();
}
}
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 5d86faafdd85..215da630caf4 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -38,6 +38,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -242,11 +243,11 @@ void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
/// Print the schedule before exiting the region.
void SchedulePostRATDList::exitRegion() {
- DEBUG({
- dbgs() << "*** Final schedule ***\n";
- dumpSchedule();
- dbgs() << '\n';
- });
+ LLVM_DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
ScheduleDAGInstrs::exitRegion();
}
@@ -308,7 +309,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
: TargetSubtargetInfo::ANTIDEP_NONE);
}
- DEBUG(dbgs() << "PostRAScheduler\n");
+ LLVM_DEBUG(dbgs() << "PostRAScheduler\n");
SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode,
CriticalPathRCs);
@@ -412,13 +413,12 @@ void SchedulePostRATDList::schedule() {
postprocessDAG();
- DEBUG(dbgs() << "********** List Scheduling **********\n");
- DEBUG(
- for (const SUnit &SU : SUnits) {
- SU.dumpAll(this);
- dbgs() << '\n';
- }
- );
+ LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");
+ LLVM_DEBUG(for (const SUnit &SU
+ : SUnits) {
+ SU.dumpAll(this);
+ dbgs() << '\n';
+ });
AvailableQueue.initNodes(SUnits);
ListScheduleTopDown();
@@ -501,8 +501,8 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
/// count of its successors. If a successor pending count is zero, add it to
/// the Available queue.
void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
- DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
- DEBUG(SU->dump(this));
+ LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ LLVM_DEBUG(SU->dump(this));
Sequence.push_back(SU);
assert(CurCycle >= SU->getDepth() &&
@@ -516,7 +516,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
/// emitNoop - Add a noop to the current instruction sequence.
void SchedulePostRATDList::emitNoop(unsigned CurCycle) {
- DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
+ LLVM_DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
HazardRec->EmitNoop();
Sequence.push_back(nullptr); // NULL here means noop
++NumNoops;
@@ -568,7 +568,8 @@ void SchedulePostRATDList::ListScheduleTopDown() {
MinDepth = PendingQueue[i]->getDepth();
}
- DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
+ LLVM_DEBUG(dbgs() << "\n*** Examining Available\n";
+ AvailableQueue.dump(this));
SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr;
bool HasNoopHazards = false;
@@ -604,7 +605,8 @@ void SchedulePostRATDList::ListScheduleTopDown() {
// non-preferred node.
if (NotPreferredSUnit) {
if (!FoundSUnit) {
- DEBUG(dbgs() << "*** Will schedule a non-preferred instruction...\n");
+ LLVM_DEBUG(
+ dbgs() << "*** Will schedule a non-preferred instruction...\n");
FoundSUnit = NotPreferredSUnit;
} else {
AvailableQueue.push(NotPreferredSUnit);
@@ -631,19 +633,20 @@ void SchedulePostRATDList::ListScheduleTopDown() {
HazardRec->EmitInstruction(FoundSUnit);
CycleHasInsts = true;
if (HazardRec->atIssueLimit()) {
- DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n');
+ LLVM_DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle
+ << '\n');
HazardRec->AdvanceCycle();
++CurCycle;
CycleHasInsts = false;
}
} else {
if (CycleHasInsts) {
- DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
+ LLVM_DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
HazardRec->AdvanceCycle();
} else if (!HasNoopHazards) {
// Otherwise, we have a pipeline stall, but no other problem,
// just advance the current cycle and try again.
- DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
+ LLVM_DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
HazardRec->AdvanceCycle();
++NumStalls;
} else {
diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 48b48c5f6499..7e9b4af12ee9 100644
--- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -44,7 +44,7 @@ public:
void getAnalysisUsage(AnalysisUsage &au) const override;
- bool runOnMachineFunction(MachineFunction &fn) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
};
} // end anonymous namespace
@@ -73,7 +73,7 @@ bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) {
}
void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
- DEBUG(dbgs() << "Processing " << *MI);
+ LLVM_DEBUG(dbgs() << "Processing " << *MI);
unsigned Reg = MI->getOperand(0).getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
@@ -84,7 +84,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
MachineInstr *UserMI = MO.getParent();
if (!canTurnIntoImplicitDef(UserMI))
continue;
- DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI);
+ LLVM_DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI);
UserMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
WorkList.insert(UserMI);
}
@@ -116,7 +116,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
// If we found the using MI, we can erase the IMPLICIT_DEF.
if (Found) {
- DEBUG(dbgs() << "Physreg user: " << *UserMI);
+ LLVM_DEBUG(dbgs() << "Physreg user: " << *UserMI);
MI->eraseFromParent();
return;
}
@@ -125,15 +125,15 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
// Leave the physreg IMPLICIT_DEF, but trim any extra operands.
for (unsigned i = MI->getNumOperands() - 1; i; --i)
MI->RemoveOperand(i);
- DEBUG(dbgs() << "Keeping physreg: " << *MI);
+ LLVM_DEBUG(dbgs() << "Keeping physreg: " << *MI);
}
/// processImplicitDefs - Process IMPLICIT_DEF instructions and turn them into
/// <undef> operands.
bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
- << "********** Function: " << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
+ << "********** Function: " << MF.getName() << '\n');
bool Changed = false;
@@ -154,8 +154,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
if (WorkList.empty())
continue;
- DEBUG(dbgs() << printMBBReference(*MFI) << " has " << WorkList.size()
- << " implicit defs.\n");
+ LLVM_DEBUG(dbgs() << printMBBReference(*MFI) << " has " << WorkList.size()
+ << " implicit defs.\n");
Changed = true;
// Drain the WorkList to recursively process any new implicit defs.
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index a8d8ad8ac7dc..fc62c8caf59e 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -38,7 +38,6 @@
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
@@ -90,7 +89,7 @@ public:
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
- bool runOnMachineFunction(MachineFunction &Fn) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
private:
RegScavenger *RS;
@@ -117,15 +116,15 @@ private:
// Emit remarks.
MachineOptimizationRemarkEmitter *ORE = nullptr;
- void calculateCallFrameInfo(MachineFunction &Fn);
- void calculateSaveRestoreBlocks(MachineFunction &Fn);
+ void calculateCallFrameInfo(MachineFunction &MF);
+ void calculateSaveRestoreBlocks(MachineFunction &MF);
void spillCalleeSavedRegs(MachineFunction &MF);
- void calculateFrameObjectOffsets(MachineFunction &Fn);
- void replaceFrameIndices(MachineFunction &Fn);
- void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+ void calculateFrameObjectOffsets(MachineFunction &MF);
+ void replaceFrameIndices(MachineFunction &MF);
+ void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
int &SPAdj);
- void insertPrologEpilogCode(MachineFunction &Fn);
+ void insertPrologEpilogCode(MachineFunction &MF);
};
} // end anonymous namespace
@@ -143,7 +142,6 @@ INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false,
false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(StackProtector)
INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_END(PEI, DEBUG_TYPE,
"Prologue/Epilogue Insertion & Frame Finalization", false,
@@ -160,7 +158,6 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addPreserved<MachineLoopInfo>();
AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<StackProtector>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -170,36 +167,36 @@ using StackObjSet = SmallSetVector<int, 8>;
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
-bool PEI::runOnMachineFunction(MachineFunction &Fn) {
- const Function &F = Fn.getFunction();
- const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
- const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+bool PEI::runOnMachineFunction(MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
- RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr;
- FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
+ RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr;
+ FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF);
FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
- TRI->requiresFrameIndexReplacementScavenging(Fn);
+ TRI->requiresFrameIndexReplacementScavenging(MF);
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
// Calculate the MaxCallFrameSize and AdjustsStack variables for the
// function's frame information. Also eliminates call frame pseudo
// instructions.
- calculateCallFrameInfo(Fn);
+ calculateCallFrameInfo(MF);
// Determine placement of CSR spill/restore code and prolog/epilog code:
// place all spills in the entry block, all restores in return blocks.
- calculateSaveRestoreBlocks(Fn);
+ calculateSaveRestoreBlocks(MF);
// Handle CSR spilling and restoring, for targets that need it.
- if (Fn.getTarget().usesPhysRegsForPEI())
- spillCalleeSavedRegs(Fn);
+ if (MF.getTarget().usesPhysRegsForPEI())
+ spillCalleeSavedRegs(MF);
// Allow the target machine to make final modifications to the function
// before the frame layout is finalized.
- TFI->processFunctionBeforeFrameFinalized(Fn, RS);
+ TFI->processFunctionBeforeFrameFinalized(MF, RS);
// Calculate actual frame offsets for all abstract stack objects...
- calculateFrameObjectOffsets(Fn);
+ calculateFrameObjectOffsets(MF);
// Add prolog and epilog code to the function. This function is required
// to align the stack frame as necessary for any stack variables or
@@ -207,26 +204,32 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// must be called before this function in order to set the AdjustsStack
// and MaxCallFrameSize variables.
if (!F.hasFnAttribute(Attribute::Naked))
- insertPrologEpilogCode(Fn);
+ insertPrologEpilogCode(MF);
// Replace all MO_FrameIndex operands with physical register references
// and actual offsets.
//
- replaceFrameIndices(Fn);
+ replaceFrameIndices(MF);
// If register scavenging is needed, as we've enabled doing it as a
// post-pass, scavenge the virtual registers that frame index elimination
// inserted.
- if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
- scavengeFrameVirtualRegs(Fn, *RS);
+ if (TRI->requiresRegisterScavenging(MF) && FrameIndexVirtualScavenging)
+ scavengeFrameVirtualRegs(MF, *RS);
// Warn on stack size when we exceeds the given limit.
- MachineFrameInfo &MFI = Fn.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
uint64_t StackSize = MFI.getStackSize();
if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
DiagnosticInfoStackSize DiagStackSize(F, StackSize);
F.getContext().diagnose(DiagStackSize);
}
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize",
+ MF.getFunction().getSubprogram(),
+ &MF.front())
+ << ore::NV("NumStackBytes", StackSize) << " stack bytes in function";
+ });
delete RS;
SaveBlocks.clear();
@@ -239,10 +242,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
/// Calculate the MaxCallFrameSize and AdjustsStack
/// variables for the function's frame information and eliminate call frame
/// pseudo instructions.
-void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
- const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
- const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
- MachineFrameInfo &MFI = Fn.getFrameInfo();
+void PEI::calculateCallFrameInfo(MachineFunction &MF) {
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned MaxCallFrameSize = 0;
bool AdjustsStack = MFI.adjustsStack();
@@ -257,7 +260,7 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
return;
std::vector<MachineBasicBlock::iterator> FrameSDOps;
- for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
if (TII.isFrameInstr(*I)) {
unsigned Size = TII.getFrameSize(*I);
@@ -285,15 +288,15 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
// the target doesn't indicate otherwise, remove the call frame pseudos
// here. The sub/add sp instruction pairs are still inserted, but we don't
// need to track the SP adjustment for frame index elimination.
- if (TFI->canSimplifyCallFramePseudos(Fn))
- TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+ if (TFI->canSimplifyCallFramePseudos(MF))
+ TFI->eliminateCallFramePseudoInstr(MF, *I->getParent(), I);
}
}
/// Compute the sets of entry and return blocks for saving and restoring
/// callee-saved registers, and placing prolog and epilog code.
-void PEI::calculateSaveRestoreBlocks(MachineFunction &Fn) {
- const MachineFrameInfo &MFI = Fn.getFrameInfo();
+void PEI::calculateSaveRestoreBlocks(MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
// Even when we do not change any CSR, we still want to insert the
// prologue and epilogue of the function.
@@ -313,8 +316,8 @@ void PEI::calculateSaveRestoreBlocks(MachineFunction &Fn) {
}
// Save refs to entry and return blocks.
- SaveBlocks.push_back(&Fn.front());
- for (MachineBasicBlock &MBB : Fn) {
+ SaveBlocks.push_back(&MF.front());
+ for (MachineBasicBlock &MBB : MF) {
if (MBB.isEHFuncletEntry())
SaveBlocks.push_back(&MBB);
if (MBB.isReturnBlock())
@@ -457,10 +460,10 @@ static void updateLiveness(MachineFunction &MF) {
/// Insert restore code for the callee-saved registers used in the function.
static void insertCSRSaves(MachineBasicBlock &SaveBlock,
ArrayRef<CalleeSavedInfo> CSI) {
- MachineFunction &Fn = *SaveBlock.getParent();
- const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
- const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
- const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+ MachineFunction &MF = *SaveBlock.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
MachineBasicBlock::iterator I = SaveBlock.begin();
if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
@@ -477,10 +480,10 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
/// Insert restore code for the callee-saved registers used in the function.
static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
std::vector<CalleeSavedInfo> &CSI) {
- MachineFunction &Fn = *RestoreBlock.getParent();
- const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
- const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
- const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+ MachineFunction &MF = *RestoreBlock.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
// Restore all registers immediately before the return and any
// terminators that precede it.
@@ -499,27 +502,27 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
}
}
-void PEI::spillCalleeSavedRegs(MachineFunction &Fn) {
+void PEI::spillCalleeSavedRegs(MachineFunction &MF) {
// We can't list this requirement in getRequiredProperties because some
// targets (WebAssembly) use virtual registers past this point, and the pass
// pipeline is set up without giving the passes a chance to look at the
// TargetMachine.
// FIXME: Find a way to express this in getRequiredProperties.
- assert(Fn.getProperties().hasProperty(
+ assert(MF.getProperties().hasProperty(
MachineFunctionProperties::Property::NoVRegs));
- const Function &F = Fn.getFunction();
- const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
- MachineFrameInfo &MFI = Fn.getFrameInfo();
+ const Function &F = MF.getFunction();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
MinCSFrameIndex = std::numeric_limits<unsigned>::max();
MaxCSFrameIndex = 0;
// Determine which of the registers in the callee save list should be saved.
BitVector SavedRegs;
- TFI->determineCalleeSaves(Fn, SavedRegs, RS);
+ TFI->determineCalleeSaves(MF, SavedRegs, RS);
// Assign stack slots for any callee-saved registers that must be spilled.
- assignCalleeSavedSpillSlots(Fn, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex);
+ assignCalleeSavedSpillSlots(MF, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex);
// Add the code to save and restore the callee saved registers.
if (!F.hasFnAttribute(Attribute::Naked)) {
@@ -531,7 +534,7 @@ void PEI::spillCalleeSavedRegs(MachineFunction &Fn) {
insertCSRSaves(*SaveBlock, CSI);
// Update the live-in information of all the blocks up to the save
// point.
- updateLiveness(Fn);
+ updateLiveness(MF);
}
for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
insertCSRRestores(*RestoreBlock, CSI);
@@ -558,10 +561,12 @@ AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
Offset = alignTo(Offset, Align, Skew);
if (StackGrowsDown) {
- DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
+ LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset
+ << "]\n");
MFI.setObjectOffset(FrameIdx, -Offset); // Set the computed offset
} else {
- DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
+ LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset
+ << "]\n");
MFI.setObjectOffset(FrameIdx, Offset);
Offset += MFI.getObjectSize(FrameIdx);
}
@@ -654,12 +659,12 @@ static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx,
if (StackGrowsDown) {
int ObjStart = -(FreeStart + ObjSize);
- DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << ObjStart
- << "]\n");
+ LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP["
+ << ObjStart << "]\n");
MFI.setObjectOffset(FrameIdx, ObjStart);
} else {
- DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << FreeStart
- << "]\n");
+ LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP["
+ << FreeStart << "]\n");
MFI.setObjectOffset(FrameIdx, FreeStart);
}
@@ -685,15 +690,14 @@ AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
/// abstract stack objects.
-void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
- const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
- StackProtector *SP = &getAnalysis<StackProtector>();
+void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
bool StackGrowsDown =
TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
// Loop over all of the stack objects, assigning sequential addresses...
- MachineFrameInfo &MFI = Fn.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
// Start at the beginning of the local area.
// The Offset is the distance from the stack top in the direction
@@ -706,7 +710,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
int64_t Offset = LocalAreaOffset;
// Skew to be applied to alignment.
- unsigned Skew = TFI.getStackAlignmentSkew(Fn);
+ unsigned Skew = TFI.getStackAlignmentSkew(MF);
// If there are fixed sized objects that are preallocated in the local area,
// non-fixed objects can't be allocated right at the start of local area.
@@ -739,7 +743,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Adjust to alignment boundary
Offset = alignTo(Offset, Align, Skew);
- DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n");
+ LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n");
MFI.setObjectOffset(i, -Offset); // Set the computed offset
}
} else if (MaxCSFrameIndex >= MinCSFrameIndex) {
@@ -752,7 +756,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Adjust to alignment boundary
Offset = alignTo(Offset, Align, Skew);
- DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n");
+ LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n");
MFI.setObjectOffset(i, Offset);
Offset += MFI.getObjectSize(i);
}
@@ -766,11 +770,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Make sure the special register scavenging spill slot is closest to the
// incoming stack pointer if a frame pointer is required and is closer
// to the incoming rather than the final stack pointer.
- const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo();
- bool EarlyScavengingSlots = (TFI.hasFP(Fn) &&
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ bool EarlyScavengingSlots = (TFI.hasFP(MF) &&
TFI.isFPCloseToIncomingSP() &&
- RegInfo->useFPForScavengingIndex(Fn) &&
- !RegInfo->needsStackRealignment(Fn));
+ RegInfo->useFPForScavengingIndex(MF) &&
+ !RegInfo->needsStackRealignment(MF));
if (RS && EarlyScavengingSlots) {
SmallVector<int, 2> SFIs;
RS->getScavengingFrameIndices(SFIs);
@@ -789,14 +793,14 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Adjust to alignment boundary.
Offset = alignTo(Offset, Align, Skew);
- DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
+ LLVM_DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
// Resolve offsets for objects in the local block.
for (unsigned i = 0, e = MFI.getLocalFrameObjectCount(); i != e; ++i) {
std::pair<int, int64_t> Entry = MFI.getLocalFrameObjectMap(i);
int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
- DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
- FIOffset << "]\n");
+ LLVM_DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << FIOffset
+ << "]\n");
MFI.setObjectOffset(Entry.first, FIOffset);
}
// Allocate the local block
@@ -807,7 +811,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Retrieve the Exception Handler registration node.
int EHRegNodeFrameIndex = std::numeric_limits<int>::max();
- if (const WinEHFuncInfo *FuncInfo = Fn.getWinEHFuncInfo())
+ if (const WinEHFuncInfo *FuncInfo = MF.getWinEHFuncInfo())
EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex;
// Make sure that the stack protector comes before the local variables on the
@@ -836,16 +840,16 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
EHRegNodeFrameIndex == (int)i)
continue;
- switch (SP->getSSPLayout(MFI.getObjectAllocation(i))) {
- case StackProtector::SSPLK_None:
+ switch (MFI.getObjectSSPLayout(i)) {
+ case MachineFrameInfo::SSPLK_None:
continue;
- case StackProtector::SSPLK_SmallArray:
+ case MachineFrameInfo::SSPLK_SmallArray:
SmallArrayObjs.insert(i);
continue;
- case StackProtector::SSPLK_AddrOf:
+ case MachineFrameInfo::SSPLK_AddrOf:
AddrOfObjs.insert(i);
continue;
- case StackProtector::SSPLK_LargeArray:
+ case MachineFrameInfo::SSPLK_LargeArray:
LargeArrayObjs.insert(i);
continue;
}
@@ -889,9 +893,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
MaxAlign, Skew);
// Give the targets a chance to order the objects the way they like it.
- if (Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
- Fn.getTarget().Options.StackSymbolOrdering)
- TFI.orderFrameObjects(Fn, ObjectsToAllocate);
+ if (MF.getTarget().getOptLevel() != CodeGenOpt::None &&
+ MF.getTarget().Options.StackSymbolOrdering)
+ TFI.orderFrameObjects(MF, ObjectsToAllocate);
// Keep track of which bytes in the fixed and callee-save range are used so we
// can use the holes when allocating later stack objects. Only do this if
@@ -899,8 +903,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// optimizing.
BitVector StackBytesFree;
if (!ObjectsToAllocate.empty() &&
- Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
- MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn))
+ MF.getTarget().getOptLevel() != CodeGenOpt::None &&
+ MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(MF))
computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex,
FixedCSEnd, StackBytesFree);
@@ -924,7 +928,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// If we have reserved argument space for call sites in the function
// immediately on entry to the current function, count it as part of the
// overall stack size.
- if (MFI.adjustsStack() && TFI.hasReservedCallFrame(Fn))
+ if (MFI.adjustsStack() && TFI.hasReservedCallFrame(MF))
Offset += MFI.getMaxCallFrameSize();
// Round up the size to a multiple of the alignment. If the function has
@@ -934,7 +938,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// value.
unsigned StackAlign;
if (MFI.adjustsStack() || MFI.hasVarSizedObjects() ||
- (RegInfo->needsStackRealignment(Fn) && MFI.getObjectIndexEnd() != 0))
+ (RegInfo->needsStackRealignment(MF) && MFI.getObjectIndexEnd() != 0))
StackAlign = TFI.getStackAlignment();
else
StackAlign = TFI.getTransientStackAlignment();
@@ -949,68 +953,61 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
int64_t StackSize = Offset - LocalAreaOffset;
MFI.setStackSize(StackSize);
NumBytesStackSpace += StackSize;
-
- ORE->emit([&]() {
- return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize",
- Fn.getFunction().getSubprogram(),
- &Fn.front())
- << ore::NV("NumStackBytes", StackSize) << " stack bytes in function";
- });
}
/// insertPrologEpilogCode - Scan the function for modified callee saved
/// registers, insert spill code for these callee saved registers, then add
/// prolog and epilog code to the function.
-void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
- const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+void PEI::insertPrologEpilogCode(MachineFunction &MF) {
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
// Add prologue to the function...
for (MachineBasicBlock *SaveBlock : SaveBlocks)
- TFI.emitPrologue(Fn, *SaveBlock);
+ TFI.emitPrologue(MF, *SaveBlock);
// Add epilogue to restore the callee-save registers in each exiting block.
for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
- TFI.emitEpilogue(Fn, *RestoreBlock);
+ TFI.emitEpilogue(MF, *RestoreBlock);
for (MachineBasicBlock *SaveBlock : SaveBlocks)
- TFI.inlineStackProbe(Fn, *SaveBlock);
+ TFI.inlineStackProbe(MF, *SaveBlock);
// Emit additional code that is required to support segmented stacks, if
// we've been asked for it. This, when linked with a runtime with support
// for segmented stacks (libgcc is one), will result in allocating stack
// space in small chunks instead of one large contiguous block.
- if (Fn.shouldSplitStack()) {
+ if (MF.shouldSplitStack()) {
for (MachineBasicBlock *SaveBlock : SaveBlocks)
- TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
+ TFI.adjustForSegmentedStacks(MF, *SaveBlock);
// Record that there are split-stack functions, so we will emit a
// special section to tell the linker.
- Fn.getMMI().setHasSplitStack(true);
+ MF.getMMI().setHasSplitStack(true);
} else
- Fn.getMMI().setHasNosplitStack(true);
+ MF.getMMI().setHasNosplitStack(true);
// Emit additional code that is required to explicitly handle the stack in
// HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
// approach is rather similar to that of Segmented Stacks, but it uses a
// different conditional check and another BIF for allocating more stack
// space.
- if (Fn.getFunction().getCallingConv() == CallingConv::HiPE)
+ if (MF.getFunction().getCallingConv() == CallingConv::HiPE)
for (MachineBasicBlock *SaveBlock : SaveBlocks)
- TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
+ TFI.adjustForHiPEPrologue(MF, *SaveBlock);
}
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
/// register references and actual offsets.
-void PEI::replaceFrameIndices(MachineFunction &Fn) {
- const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
- if (!TFI.needsFrameIndexResolution(Fn)) return;
+void PEI::replaceFrameIndices(MachineFunction &MF) {
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+ if (!TFI.needsFrameIndexResolution(MF)) return;
// Store SPAdj at exit of a basic block.
SmallVector<int, 8> SPState;
- SPState.resize(Fn.getNumBlockIDs());
+ SPState.resize(MF.getNumBlockIDs());
df_iterator_default_set<MachineBasicBlock*> Reachable;
// Iterate over the reachable blocks in DFS order.
- for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable);
+ for (auto DFI = df_ext_begin(&MF, Reachable), DFE = df_ext_end(&MF, Reachable);
DFI != DFE; ++DFI) {
int SPAdj = 0;
// Check the exit state of the DFS stack predecessor.
@@ -1021,27 +1018,27 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
SPAdj = SPState[StackPred->getNumber()];
}
MachineBasicBlock *BB = *DFI;
- replaceFrameIndices(BB, Fn, SPAdj);
+ replaceFrameIndices(BB, MF, SPAdj);
SPState[BB->getNumber()] = SPAdj;
}
// Handle the unreachable blocks.
- for (auto &BB : Fn) {
+ for (auto &BB : MF) {
if (Reachable.count(&BB))
// Already handled in DFS traversal.
continue;
int SPAdj = 0;
- replaceFrameIndices(&BB, Fn, SPAdj);
+ replaceFrameIndices(&BB, MF, SPAdj);
}
}
-void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
int &SPAdj) {
- assert(Fn.getSubtarget().getRegisterInfo() &&
+ assert(MF.getSubtarget().getRegisterInfo() &&
"getRegisterInfo() must be implemented!");
- const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
- const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
- const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (RS && FrameIndexEliminationScavenging)
RS->enterBasicBlock(*BB);
@@ -1052,7 +1049,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
if (TII.isFrameInstr(*I)) {
InsideCallSequence = TII.isFrameSetup(*I);
SPAdj += TII.getSPAdjust(*I);
- I = TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
+ I = TFI->eliminateCallFramePseudoInstr(MF, *BB, I);
continue;
}
@@ -1071,8 +1068,9 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
"operand of a DBG_VALUE machine instruction");
unsigned Reg;
int64_t Offset =
- TFI->getFrameIndexReference(Fn, MI.getOperand(0).getIndex(), Reg);
+ TFI->getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg);
MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
+ MI.getOperand(0).setIsDebug();
auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(),
DIExpression::NoDeref, Offset);
MI.getOperand(3).setMetadata(DIExpr);
@@ -1091,7 +1089,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
unsigned Reg;
MachineOperand &Offset = MI.getOperand(i + 1);
int refOffset = TFI->getFrameIndexReferencePreferSP(
- Fn, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false);
+ MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false);
Offset.setImm(Offset.getImm() + refOffset);
MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
continue;
diff --git a/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
new file mode 100644
index 000000000000..050fef5d25ed
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -0,0 +1,195 @@
+//===---- ReachingDefAnalysis.cpp - Reaching Def Analysis ---*- C++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ReachingDefAnalysis.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "reaching-deps-analysis"
+
+char ReachingDefAnalysis::ID = 0;
+INITIALIZE_PASS(ReachingDefAnalysis, DEBUG_TYPE, "ReachingDefAnalysis", false,
+ true)
+
+void ReachingDefAnalysis::enterBasicBlock(
+ const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
+
+ MachineBasicBlock *MBB = TraversedMBB.MBB;
+ unsigned MBBNumber = MBB->getNumber();
+ assert(MBBNumber < MBBReachingDefs.size() &&
+ "Unexpected basic block number.");
+ MBBReachingDefs[MBBNumber].resize(NumRegUnits);
+
+ // Reset instruction counter in each basic block.
+ CurInstr = 0;
+
+ // Set up LiveRegs to represent registers entering MBB.
+ // Default values are 'nothing happened a long time ago'.
+ if (LiveRegs.empty())
+ LiveRegs.assign(NumRegUnits, ReachingDefDefaultVal);
+
+ // This is the entry block.
+ if (MBB->pred_empty()) {
+ for (const auto &LI : MBB->liveins()) {
+ for (MCRegUnitIterator Unit(LI.PhysReg, TRI); Unit.isValid(); ++Unit) {
+ // Treat function live-ins as if they were defined just before the first
+ // instruction. Usually, function arguments are set up immediately
+ // before the call.
+ LiveRegs[*Unit] = -1;
+ MBBReachingDefs[MBBNumber][*Unit].push_back(LiveRegs[*Unit]);
+ }
+ }
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n");
+ return;
+ }
+
+ // Try to coalesce live-out registers from predecessors.
+ for (MachineBasicBlock *pred : MBB->predecessors()) {
+ assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() &&
+ "Should have pre-allocated MBBInfos for all MBBs");
+ const LiveRegsDefInfo &Incoming = MBBOutRegsInfos[pred->getNumber()];
+ // Incoming is null if this is a backedge from a BB
+ // we haven't processed yet
+ if (Incoming.empty())
+ continue;
+
+ for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit) {
+ // Use the most recent predecessor def for each register.
+ LiveRegs[Unit] = std::max(LiveRegs[Unit], Incoming[Unit]);
+ if ((LiveRegs[Unit] != ReachingDefDefaultVal))
+ MBBReachingDefs[MBBNumber][Unit].push_back(LiveRegs[Unit]);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB)
+ << (!TraversedMBB.IsDone ? ": incomplete\n"
+ : ": all preds known\n"));
+}
+
+void ReachingDefAnalysis::leaveBasicBlock(
+ const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
+ assert(!LiveRegs.empty() && "Must enter basic block first.");
+ unsigned MBBNumber = TraversedMBB.MBB->getNumber();
+ assert(MBBNumber < MBBOutRegsInfos.size() &&
+ "Unexpected basic block number.");
+ // Save register clearances at end of MBB - used by enterBasicBlock().
+ MBBOutRegsInfos[MBBNumber] = LiveRegs;
+
+ // While processing the basic block, we kept `Def` relative to the start
+ // of the basic block for convenience. However, future use of this information
+ // only cares about the clearance from the end of the block, so adjust
+ // everything to be relative to the end of the basic block.
+ for (int &OutLiveReg : MBBOutRegsInfos[MBBNumber])
+ OutLiveReg -= CurInstr;
+ LiveRegs.clear();
+}
+
+void ReachingDefAnalysis::processDefs(MachineInstr *MI) {
+ assert(!MI->isDebugInstr() && "Won't process debug instructions");
+
+ unsigned MBBNumber = MI->getParent()->getNumber();
+ assert(MBBNumber < MBBReachingDefs.size() &&
+ "Unexpected basic block number.");
+ const MCInstrDesc &MCID = MI->getDesc();
+ for (unsigned i = 0,
+ e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
+ i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isUse())
+ continue;
+ for (MCRegUnitIterator Unit(MO.getReg(), TRI); Unit.isValid(); ++Unit) {
+ // This instruction explicitly defines the current reg unit.
+ LLVM_DEBUG(dbgs() << printReg(MO.getReg(), TRI) << ":\t" << CurInstr
+ << '\t' << *MI);
+
+ // How many instructions since this reg unit was last written?
+ LiveRegs[*Unit] = CurInstr;
+ MBBReachingDefs[MBBNumber][*Unit].push_back(CurInstr);
+ }
+ }
+ InstIds[MI] = CurInstr;
+ ++CurInstr;
+}
+
+void ReachingDefAnalysis::processBasicBlock(
+ const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
+ enterBasicBlock(TraversedMBB);
+ for (MachineInstr &MI : *TraversedMBB.MBB) {
+ if (!MI.isDebugInstr())
+ processDefs(&MI);
+ }
+ leaveBasicBlock(TraversedMBB);
+}
+
+bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(mf.getFunction()))
+ return false;
+ MF = &mf;
+ TRI = MF->getSubtarget().getRegisterInfo();
+
+ LiveRegs.clear();
+ NumRegUnits = TRI->getNumRegUnits();
+
+ MBBReachingDefs.resize(mf.getNumBlockIDs());
+
+ LLVM_DEBUG(dbgs() << "********** REACHING DEFINITION ANALYSIS **********\n");
+
+ // Initialize the MBBOutRegsInfos
+ MBBOutRegsInfos.resize(mf.getNumBlockIDs());
+
+ // Traverse the basic blocks.
+ LoopTraversal Traversal;
+ LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf);
+ for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) {
+ processBasicBlock(TraversedMBB);
+ }
+
+ // Sorting all reaching defs found for a ceartin reg unit in a given BB.
+ for (MBBDefsInfo &MBBDefs : MBBReachingDefs) {
+ for (MBBRegUnitDefs &RegUnitDefs : MBBDefs)
+ llvm::sort(RegUnitDefs.begin(), RegUnitDefs.end());
+ }
+
+ return false;
+}
+
+void ReachingDefAnalysis::releaseMemory() {
+ // Clear the internal vectors.
+ MBBOutRegsInfos.clear();
+ MBBReachingDefs.clear();
+ InstIds.clear();
+}
+
+int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) {
+ assert(InstIds.count(MI) && "Unexpected machine instuction.");
+ int InstId = InstIds[MI];
+ int DefRes = ReachingDefDefaultVal;
+ unsigned MBBNumber = MI->getParent()->getNumber();
+ assert(MBBNumber < MBBReachingDefs.size() &&
+ "Unexpected basic block number.");
+ int LatestDef = ReachingDefDefaultVal;
+ for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
+ for (int Def : MBBReachingDefs[MBBNumber][*Unit]) {
+ if (Def >= InstId)
+ break;
+ DefRes = Def;
+ }
+ LatestDef = std::max(LatestDef, DefRes);
+ }
+ return LatestDef;
+}
+
+int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) {
+ assert(InstIds.count(MI) && "Unexpected machine instuction.");
+ return InstIds[MI] - getReachingDef(MI, PhysReg);
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
index 74c1592634aa..bc28a054c680 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -91,7 +91,7 @@ void RegAllocBase::allocatePhysRegs() {
// Unused registers can appear when the spiller coalesces snippets.
if (MRI->reg_nodbg_empty(VirtReg->reg)) {
- DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
+ LLVM_DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
aboutToRemoveInterval(*VirtReg);
LIS->removeInterval(VirtReg->reg);
continue;
@@ -103,9 +103,9 @@ void RegAllocBase::allocatePhysRegs() {
// selectOrSplit requests the allocator to return an available physical
// register if possible and populate a list of new live intervals that
// result from splitting.
- DEBUG(dbgs() << "\nselectOrSplit "
- << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg))
- << ':' << *VirtReg << " w=" << VirtReg->weight << '\n');
+ LLVM_DEBUG(dbgs() << "\nselectOrSplit "
+ << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg))
+ << ':' << *VirtReg << " w=" << VirtReg->weight << '\n');
using VirtRegVec = SmallVector<unsigned, 4>;
@@ -145,12 +145,12 @@ void RegAllocBase::allocatePhysRegs() {
assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
assert(SplitVirtReg->empty() && "Non-empty but used interval");
- DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
+ LLVM_DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
aboutToRemoveInterval(*SplitVirtReg);
LIS->removeInterval(SplitVirtReg->reg);
continue;
}
- DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+ LLVM_DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
"expect split value in virtual register");
enqueue(SplitVirtReg);
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
index 1125d2c62bef..daeff3fc3963 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -219,8 +219,8 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
Intfs.push_back(Intf);
}
}
- DEBUG(dbgs() << "spilling " << printReg(PhysReg, TRI)
- << " interferences with " << VirtReg << "\n");
+ LLVM_DEBUG(dbgs() << "spilling " << printReg(PhysReg, TRI)
+ << " interferences with " << VirtReg << "\n");
assert(!Intfs.empty() && "expected interference");
// Spill each interfering vreg allocated to PhysReg or an alias.
@@ -292,7 +292,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
}
// No other spill candidates were found, so spill the current VirtReg.
- DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
+ LLVM_DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
if (!VirtReg.isSpillable())
return ~0u;
LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats);
@@ -304,9 +304,8 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
}
bool RABasic::runOnMachineFunction(MachineFunction &mf) {
- DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
- << "********** Function: "
- << mf.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
+ << "********** Function: " << mf.getName() << '\n');
MF = &mf;
RegAllocBase::init(getAnalysis<VirtRegMap>(),
@@ -323,7 +322,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
postOptimization();
// Diagnostic output before rewriting
- DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
+ LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
releaseMemory();
return true;
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index 17d9492d942e..7b57c6cbcdb8 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -179,7 +179,7 @@ namespace {
}
private:
- bool runOnMachineFunction(MachineFunction &Fn) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
void allocateBasicBlock(MachineBasicBlock &MBB);
void handleThroughOperands(MachineInstr &MI,
SmallVectorImpl<unsigned> &VirtDead);
@@ -206,7 +206,7 @@ namespace {
return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
}
- LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, MCPhysReg PhysReg);
+ LiveRegMap::iterator assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg);
LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator,
unsigned Hint);
LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum,
@@ -322,11 +322,11 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
// instruction, not on the spill.
bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
LR.Dirty = false;
- DEBUG(dbgs() << "Spilling " << printReg(LRI->VirtReg, TRI)
- << " in " << printReg(LR.PhysReg, TRI));
+ LLVM_DEBUG(dbgs() << "Spilling " << printReg(LRI->VirtReg, TRI) << " in "
+ << printReg(LR.PhysReg, TRI));
const TargetRegisterClass &RC = *MRI->getRegClass(LRI->VirtReg);
int FI = getStackSpaceFor(LRI->VirtReg, RC);
- DEBUG(dbgs() << " to stack slot #" << FI << "\n");
+ LLVM_DEBUG(dbgs() << " to stack slot #" << FI << "\n");
TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, &RC, TRI);
++NumStores; // Update statistics
@@ -339,7 +339,9 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
MachineInstr *NewDV = buildDbgValueForSpill(*MBB, MI, *DBG, FI);
assert(NewDV->getParent() == MBB && "dangling parent pointer");
(void)NewDV;
- DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
+ LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:"
+ << "\n"
+ << *NewDV);
}
// Now this register is spilled there is should not be any DBG_VALUE
// pointing to this register because they are all pointing to spilled value
@@ -470,13 +472,14 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
}
}
-/// \brief Return the cost of spilling clearing out PhysReg and aliases so it is
+/// Return the cost of spilling clearing out PhysReg and aliases so it is
/// free for allocation. Returns 0 when PhysReg is free or disabled with all
/// aliases disabled - it can be allocated directly.
/// \returns spillImpossible when PhysReg or an alias can't be spilled.
unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
if (isRegUsedInInstr(PhysReg)) {
- DEBUG(dbgs() << printReg(PhysReg, TRI) << " is already used in instr.\n");
+ LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI)
+ << " is already used in instr.\n");
return spillImpossible;
}
switch (unsigned VirtReg = PhysRegState[PhysReg]) {
@@ -485,8 +488,8 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
case regFree:
return 0;
case regReserved:
- DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
- << printReg(PhysReg, TRI) << " is reserved already.\n");
+ LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
+ << printReg(PhysReg, TRI) << " is reserved already.\n");
return spillImpossible;
default: {
LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
@@ -496,7 +499,7 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
}
// This is a disabled register, add up cost of aliases.
- DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n");
+ LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n");
unsigned Cost = 0;
for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
MCPhysReg Alias = *AI;
@@ -519,12 +522,12 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
return Cost;
}
-/// \brief This method updates local state so that we know that PhysReg is the
+/// This method updates local state so that we know that PhysReg is the
/// proper container for VirtReg now. The physical register must not be used
/// for anything else when this is called.
void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
- DEBUG(dbgs() << "Assigning " << printReg(LR.VirtReg, TRI) << " to "
- << printReg(PhysReg, TRI) << "\n");
+ LLVM_DEBUG(dbgs() << "Assigning " << printReg(LR.VirtReg, TRI) << " to "
+ << printReg(PhysReg, TRI) << "\n");
PhysRegState[PhysReg] = LR.VirtReg;
assert(!LR.PhysReg && "Already assigned a physreg");
LR.PhysReg = PhysReg;
@@ -570,16 +573,16 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI,
}
}
- DEBUG(dbgs() << "Allocating " << printReg(VirtReg) << " from "
- << TRI->getRegClassName(&RC) << "\n");
+ LLVM_DEBUG(dbgs() << "Allocating " << printReg(VirtReg) << " from "
+ << TRI->getRegClassName(&RC) << "\n");
unsigned BestReg = 0;
unsigned BestCost = spillImpossible;
for (MCPhysReg PhysReg : AO) {
unsigned Cost = calcSpillCost(PhysReg);
- DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << "\n");
- DEBUG(dbgs() << "\tCost: " << Cost << "\n");
- DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
+ LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << "\n");
+ LLVM_DEBUG(dbgs() << "\tCost: " << Cost << "\n");
+ LLVM_DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
// Cost is 0 when all aliases are already disabled.
if (Cost == 0) {
assignVirtToPhysReg(*LRI, PhysReg);
@@ -654,22 +657,22 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI,
LRI = allocVirtReg(MI, LRI, Hint);
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
int FrameIndex = getStackSpaceFor(VirtReg, RC);
- DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into "
- << printReg(LRI->PhysReg, TRI) << "\n");
+ LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into "
+ << printReg(LRI->PhysReg, TRI) << "\n");
TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, &RC, TRI);
++NumLoads;
} else if (LRI->Dirty) {
if (isLastUseOfLocalReg(MO)) {
- DEBUG(dbgs() << "Killing last use: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Killing last use: " << MO << "\n");
if (MO.isUse())
MO.setIsKill();
else
MO.setIsDead();
} else if (MO.isKill()) {
- DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");
MO.setIsKill(false);
} else if (MO.isDead()) {
- DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n");
MO.setIsDead(false);
}
} else if (MO.isKill()) {
@@ -677,10 +680,10 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI,
// register would be killed immediately, and there might be a second use:
// %foo = OR killed %x, %x
// This would cause a second reload of %x into a different register.
- DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
MO.setIsKill(false);
} else if (MO.isDead()) {
- DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
MO.setIsDead(false);
}
assert(LRI->PhysReg && "Register not assigned");
@@ -699,13 +702,13 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, unsigned OpNum,
bool Dead = MO.isDead();
if (!MO.getSubReg()) {
MO.setReg(PhysReg);
- MO.setIsRenamableIfNoExtraRegAllocReq();
+ MO.setIsRenamable(true);
return MO.isKill() || Dead;
}
// Handle subregister index.
MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0);
- MO.setIsRenamableIfNoExtraRegAllocReq();
+ MO.setIsRenamable(true);
MO.setSubReg(0);
// A kill flag implies killing the full register. Add corresponding super
@@ -727,7 +730,7 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, unsigned OpNum,
// there are additional physreg defines.
void RegAllocFast::handleThroughOperands(MachineInstr &MI,
SmallVectorImpl<unsigned> &VirtDead) {
- DEBUG(dbgs() << "Scanning for through registers:");
+ LLVM_DEBUG(dbgs() << "Scanning for through registers:");
SmallSet<unsigned, 8> ThroughRegs;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
@@ -737,13 +740,13 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) ||
(MO.getSubReg() && MI.readsVirtualRegister(Reg))) {
if (ThroughRegs.insert(Reg).second)
- DEBUG(dbgs() << ' ' << printReg(Reg));
+ LLVM_DEBUG(dbgs() << ' ' << printReg(Reg));
}
}
// If any physreg defines collide with preallocated through registers,
// we must spill and reallocate.
- DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
+ LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef()) continue;
unsigned Reg = MO.getReg();
@@ -756,7 +759,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
}
SmallVector<unsigned, 8> PartialDefs;
- DEBUG(dbgs() << "Allocating tied uses.\n");
+ LLVM_DEBUG(dbgs() << "Allocating tied uses.\n");
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
@@ -764,15 +767,16 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (MO.isUse()) {
if (!MO.isTied()) continue;
- DEBUG(dbgs() << "Operand " << I << "("<< MO << ") is tied to operand "
- << MI.findTiedOperandIdx(I) << ".\n");
+ LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO
+ << ") is tied to operand " << MI.findTiedOperandIdx(I)
+ << ".\n");
LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0);
MCPhysReg PhysReg = LRI->PhysReg;
setPhysReg(MI, I, PhysReg);
// Note: we don't update the def operand yet. That would cause the normal
// def-scan to attempt spilling.
} else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) {
- DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
+ LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
// Reload the register, but don't assign to the operand just yet.
// That would confuse the later phys-def processing pass.
LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0);
@@ -780,7 +784,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
}
}
- DEBUG(dbgs() << "Allocating early clobbers.\n");
+ LLVM_DEBUG(dbgs() << "Allocating early clobbers.\n");
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
@@ -801,8 +805,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
unsigned Reg = MO.getReg();
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
- DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI)
- << " as used in instr\n");
+ LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI)
+ << " as used in instr\n");
markRegUsedInInstr(Reg);
}
@@ -848,7 +852,7 @@ void RegAllocFast::dumpState() {
void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
this->MBB = &MBB;
- DEBUG(dbgs() << "\nAllocating " << MBB);
+ LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
PhysRegState.assign(TRI->getNumRegs(), regDisabled);
assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
@@ -866,10 +870,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
// Otherwise, sequentially allocate each instruction in the MBB.
for (MachineInstr &MI : MBB) {
const MCInstrDesc &MCID = MI.getDesc();
- DEBUG(
- dbgs() << "\n>> " << MI << "Regs:";
- dumpState()
- );
+ LLVM_DEBUG(dbgs() << "\n>> " << MI << "Regs:"; dumpState());
// Debug values are not allowed to change codegen in any way.
if (MI.isDebugValue()) {
@@ -894,13 +895,13 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
if (SS != -1) {
// Modify DBG_VALUE now that the value is in a spill slot.
updateDbgValueForSpill(*DebugMI, SS);
- DEBUG(dbgs() << "Modifying debug info due to spill:"
- << "\t" << *DebugMI);
+ LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:"
+ << "\t" << *DebugMI);
continue;
}
// We can't allocate a physreg for a DebugValue, sorry!
- DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+ LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
MO.setReg(0);
}
@@ -910,6 +911,9 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
continue;
}
+ if (MI.isDebugLabel())
+ continue;
+
// If this is a copy, we may be able to coalesce.
unsigned CopySrcReg = 0;
unsigned CopyDstReg = 0;
@@ -1025,7 +1029,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
// as call-clobbered, this is not correct because some of those
// definitions may be used later on and we do not want to reuse
// those for virtual registers in between.
- DEBUG(dbgs() << " Spilling remaining registers before call.\n");
+ LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n");
spillAll(MI);
}
@@ -1060,15 +1064,15 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
VirtDead.clear();
if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) {
- DEBUG(dbgs() << "-- coalescing: " << MI);
+ LLVM_DEBUG(dbgs() << "-- coalescing: " << MI);
Coalesced.push_back(&MI);
} else {
- DEBUG(dbgs() << "<< " << MI);
+ LLVM_DEBUG(dbgs() << "<< " << MI);
}
}
// Spill all physical registers holding virtual registers now.
- DEBUG(dbgs() << "Spilling live registers at end of block.\n");
+ LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n");
spillAll(MBB.getFirstTerminator());
// Erase all the coalesced copies. We are delaying it until now because
@@ -1077,13 +1081,13 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
MBB.erase(MI);
NumCopies += Coalesced.size();
- DEBUG(MBB.dump());
+ LLVM_DEBUG(MBB.dump());
}
/// Allocates registers for a function.
bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
- << "********** Function: " << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
+ << "********** Function: " << MF.getName() << '\n');
MRI = &MF.getRegInfo();
const TargetSubtargetInfo &STI = MF.getSubtarget();
TRI = STI.getRegisterInfo();
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index e492c481a540..3333e1f2fb8b 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -125,6 +125,12 @@ static cl::opt<bool> EnableDeferredSpilling(
"variable because of other evicted variables."),
cl::init(false));
+static cl::opt<unsigned>
+ HugeSizeForSplit("huge-size-for-split", cl::Hidden,
+ cl::desc("A threshold of live range size which may cause "
+ "high compile time cost in global splitting."),
+ cl::init(5000));
+
// FIXME: Find a good default for this flag and remove the flag.
static cl::opt<unsigned>
CSRFirstTimeCost("regalloc-csr-first-time-cost",
@@ -292,7 +298,7 @@ class RAGreedy : public MachineFunctionPass,
public:
using EvictorInfo =
std::pair<unsigned /* evictor */, unsigned /* physreg */>;
- using EvicteeInfo = llvm::MapVector<unsigned /* evictee */, EvictorInfo>;
+ using EvicteeInfo = llvm::DenseMap<unsigned /* evictee */, EvictorInfo>;
private:
/// Each Vreg that has been evicted in the last stage of selectOrSplit will
@@ -300,28 +306,28 @@ class RAGreedy : public MachineFunctionPass,
EvicteeInfo Evictees;
public:
- /// \brief Clear all eviction information.
+ /// Clear all eviction information.
void clear() { Evictees.clear(); }
- /// \brief Clear eviction information for the given evictee Vreg.
+ /// Clear eviction information for the given evictee Vreg.
/// E.g. when Vreg get's a new allocation, the old eviction info is no
/// longer relevant.
/// \param Evictee The evictee Vreg for whom we want to clear collected
/// eviction info.
void clearEvicteeInfo(unsigned Evictee) { Evictees.erase(Evictee); }
- /// \brief Track new eviction.
+ /// Track new eviction.
/// The Evictor vreg has evicted the Evictee vreg from Physreg.
- /// \praram PhysReg The phisical register Evictee was evicted from.
- /// \praram Evictor The evictor Vreg that evicted Evictee.
- /// \praram Evictee The evictee Vreg.
+ /// \param PhysReg The phisical register Evictee was evicted from.
+ /// \param Evictor The evictor Vreg that evicted Evictee.
+ /// \param Evictee The evictee Vreg.
void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) {
Evictees[Evictee].first = Evictor;
Evictees[Evictee].second = PhysReg;
}
/// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg.
- /// \praram Evictee The evictee vreg.
+ /// \param Evictee The evictee vreg.
/// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if
/// nobody has evicted Evictee from PhysReg.
EvictorInfo getEvictor(unsigned Evictee) {
@@ -399,7 +405,7 @@ class RAGreedy : public MachineFunctionPass,
/// obtained from the TargetSubtargetInfo.
bool EnableLocalReassign;
- /// Enable or not the the consideration of the cost of local intervals created
+ /// Enable or not the consideration of the cost of local intervals created
/// by a split candidate when choosing the best split candidate.
bool EnableAdvancedRASplitCost;
@@ -448,13 +454,16 @@ private:
bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand,
unsigned BBNumber,
const AllocationOrder &Order);
+ bool splitCanCauseLocalSpill(unsigned VirtRegToSplit,
+ GlobalSplitCandidate &Cand, unsigned BBNumber,
+ const AllocationOrder &Order);
BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &,
const AllocationOrder &Order,
bool *CanCauseEvictionChain);
bool calcCompactRegion(GlobalSplitCandidate&);
void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
void calcGapWeights(unsigned, SmallVectorImpl<float>&);
- unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg);
+ unsigned canReassign(LiveInterval &VirtReg, unsigned PrevReg);
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
bool canEvictInterferenceInRange(LiveInterval &VirtReg, unsigned PhysReg,
@@ -475,6 +484,7 @@ private:
SmallVectorImpl<unsigned>&, unsigned = ~0u);
unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<unsigned>&);
+ unsigned isSplitBenefitWorthCost(LiveInterval &VirtReg);
/// Calculate cost of region splitting.
unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
AllocationOrder &Order,
@@ -763,7 +773,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
// preferred register.
if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
if (Order.isHint(Hint)) {
- DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n');
+ LLVM_DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n');
EvictionCost MaxCost;
MaxCost.setBrokenHints(1);
if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
@@ -782,8 +792,8 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
if (!Cost)
return PhysReg;
- DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " << Cost
- << '\n');
+ LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
+ << Cost << '\n');
unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
return CheapReg ? CheapReg : PhysReg;
}
@@ -811,9 +821,9 @@ unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {
break;
}
if (PhysReg)
- DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
- << printReg(PrevReg, TRI) << " to " << printReg(PhysReg, TRI)
- << '\n');
+ LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
+ << printReg(PrevReg, TRI) << " to "
+ << printReg(PhysReg, TRI) << '\n');
return PhysReg;
}
@@ -840,7 +850,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
return true;
if (A.weight > B.weight) {
- DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n');
+ LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n');
return true;
}
return false;
@@ -934,7 +944,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
return true;
}
-/// \brief Return true if all interferences between VirtReg and PhysReg between
+/// Return true if all interferences between VirtReg and PhysReg between
/// Start and End can be evicted.
///
/// \param VirtReg Live range that is about to be assigned.
@@ -986,7 +996,7 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
return true;
}
-/// \brief Return tthe physical register that will be best
+/// Return the physical register that will be best
/// candidate for eviction by a local split interval that will be created
/// between Start and End.
///
@@ -1032,8 +1042,8 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
if (!Cascade)
Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
- DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
- << " interference: Cascade " << Cascade << '\n');
+ LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
+ << " interference: Cascade " << Cascade << '\n');
// Collect all interfering virtregs first.
SmallVector<LiveInterval*, 8> Intfs;
@@ -1104,8 +1114,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg);
unsigned MinCost = RegClassInfo.getMinCost(RC);
if (MinCost >= CostPerUseLimit) {
- DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost
- << ", no cheaper registers to be found.\n");
+ LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
+ << MinCost << ", no cheaper registers to be found.\n");
return 0;
}
@@ -1113,7 +1123,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
// the same cost. We don't need to look at them if they're too expensive.
if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) {
OrderLimit = RegClassInfo.getLastCostChange(RC);
- DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n");
+ LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
+ << " regs.\n");
}
}
@@ -1124,9 +1135,10 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
// The first use of a callee-saved register in a function has cost 1.
// Don't start using a CSR when the CostPerUseLimit is low.
if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
- DEBUG(dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
- << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
- << '\n');
+ LLVM_DEBUG(
+ dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
+ << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
+ << '\n');
continue;
}
@@ -1313,7 +1325,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
// Perhaps iterating can enable more bundles?
SpillPlacer->iterate();
}
- DEBUG(dbgs() << ", v=" << Visited);
+ LLVM_DEBUG(dbgs() << ", v=" << Visited);
}
/// calcCompactRegion - Compute the set of edge bundles that should be live
@@ -1331,7 +1343,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
// Compact regions don't correspond to any physreg.
Cand.reset(IntfCache, 0);
- DEBUG(dbgs() << "Compact region bundles");
+ LLVM_DEBUG(dbgs() << "Compact region bundles");
// Use the spill placer to determine the live bundles. GrowRegion pretends
// that all the through blocks have interference when PhysReg is unset.
@@ -1340,7 +1352,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
// The static split cost will be zero since Cand.Intf reports no interference.
BlockFrequency Cost;
if (!addSplitConstraints(Cand.Intf, Cost)) {
- DEBUG(dbgs() << ", none.\n");
+ LLVM_DEBUG(dbgs() << ", none.\n");
return false;
}
@@ -1348,11 +1360,11 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
SpillPlacer->finish();
if (!Cand.LiveBundles.any()) {
- DEBUG(dbgs() << ", none.\n");
+ LLVM_DEBUG(dbgs() << ", none.\n");
return false;
}
- DEBUG({
+ LLVM_DEBUG({
for (int i : Cand.LiveBundles.set_bits())
dbgs() << " EB#" << i;
dbgs() << ".\n";
@@ -1378,7 +1390,7 @@ BlockFrequency RAGreedy::calcSpillCost() {
return Cost;
}
-/// \brief Check if splitting Evictee will create a local split interval in
+/// Check if splitting Evictee will create a local split interval in
/// basic block number BBNumber that may cause a bad eviction chain. This is
/// intended to prevent bad eviction sequences like:
/// movl %ebp, 8(%esp) # 4-byte Spill
@@ -1401,7 +1413,7 @@ BlockFrequency RAGreedy::calcSpillCost() {
/// Evictee %0 is intended for region splitting with split candidate
/// physreg0 (the reg %0 was evicted from).
/// Region splitting creates a local interval because of interference with the
-/// evictor %1 (normally region spliitting creates 2 interval, the "by reg"
+/// evictor %1 (normally region splitting creates 2 interval, the "by reg"
/// and "by stack" intervals and local interval created when interference
/// occurs).
/// One of the split intervals ends up evicting %2 from physreg1.
@@ -1427,7 +1439,7 @@ BlockFrequency RAGreedy::calcSpillCost() {
/// we are splitting for and the interferences.
/// \param BBNumber The number of a BB for which the region split process will
/// create a local split interval.
-/// \param Order The phisical registers that may get evicted by a split
+/// \param Order The physical registers that may get evicted by a split
/// artifact of Evictee.
/// \return True if splitting Evictee may cause a bad eviction chain, false
/// otherwise.
@@ -1448,8 +1460,8 @@ bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee,
getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee),
Cand.Intf.first(), Cand.Intf.last(), &MaxWeight);
- // The bad eviction chain occurs when either the split candidate the the
- // evited reg or one of the split artifact will evict the evicting reg.
+ // The bad eviction chain occurs when either the split candidate is the
+ // evicting reg or one of the split artifact will evict the evicting reg.
if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg))
return false;
@@ -1479,6 +1491,54 @@ bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee,
return true;
}
+/// Check if splitting VirtRegToSplit will create a local split interval
+/// in basic block number BBNumber that may cause a spill.
+///
+/// \param VirtRegToSplit The register considered to be split.
+/// \param Cand The split candidate that determines the physical
+/// register we are splitting for and the interferences.
+/// \param BBNumber The number of a BB for which the region split process
+/// will create a local split interval.
+/// \param Order The physical registers that may get evicted by a
+/// split artifact of VirtRegToSplit.
+/// \return True if splitting VirtRegToSplit may cause a spill, false
+/// otherwise.
+bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit,
+ GlobalSplitCandidate &Cand,
+ unsigned BBNumber,
+ const AllocationOrder &Order) {
+ Cand.Intf.moveToBlock(BBNumber);
+
+ // Check if the local interval will find a non interfereing assignment.
+ for (auto PhysReg : Order.getOrder()) {
+ if (!Matrix->checkInterference(Cand.Intf.first().getPrevIndex(),
+ Cand.Intf.last(), PhysReg))
+ return false;
+ }
+
+ // Check if the local interval will evict a cheaper interval.
+ float CheapestEvictWeight = 0;
+ unsigned FutureEvictedPhysReg = getCheapestEvicteeWeight(
+ Order, LIS->getInterval(VirtRegToSplit), Cand.Intf.first(),
+ Cand.Intf.last(), &CheapestEvictWeight);
+
+ // Have we found an interval that can be evicted?
+ if (FutureEvictedPhysReg) {
+ VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis<MachineLoopInfo>(), *MBFI);
+ float splitArtifactWeight =
+ VRAI.futureWeight(LIS->getInterval(VirtRegToSplit),
+ Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
+ // Will the weight of the local interval be higher than the cheapest evictee
+ // weight? If so it will evict it and will not cause a spill.
+ if (splitArtifactWeight >= 0 && splitArtifactWeight > CheapestEvictWeight)
+ return false;
+ }
+
+ // The local interval is not able to find non interferencing assignment and
+ // not able to evict a less worthy interval, therfore, it can cause a spill.
+ return true;
+}
+
/// calcGlobalSplitCost - Return the global split cost of following the split
/// pattern in LiveBundles. This cost should be added to the local cost of the
/// interference pattern in SplitConstraints.
@@ -1499,19 +1559,26 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
Cand.Intf.moveToBlock(BC.Number);
// Check wheather a local interval is going to be created during the region
- // split.
- if (EnableAdvancedRASplitCost && CanCauseEvictionChain &&
- Cand.Intf.hasInterference() && BI.LiveIn && BI.LiveOut && RegIn &&
- RegOut) {
-
- if (splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) {
- // This interfernce cause our eviction from this assignment, we might
- // evict somebody else, add that cost.
+ // split. Calculate adavanced spilt cost (cost of local intervals) if option
+ // is enabled.
+ if (EnableAdvancedRASplitCost && Cand.Intf.hasInterference() && BI.LiveIn &&
+ BI.LiveOut && RegIn && RegOut) {
+
+ if (CanCauseEvictionChain &&
+ splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) {
+ // This interference causes our eviction from this assignment, we might
+ // evict somebody else and eventually someone will spill, add that cost.
// See splitCanCauseEvictionChain for detailed description of scenarios.
GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
*CanCauseEvictionChain = true;
+
+ } else if (splitCanCauseLocalSpill(VirtRegToSplit, Cand, BC.Number,
+ Order)) {
+ // This interference causes local interval to spill, add that cost.
+ GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
+ GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
}
}
@@ -1540,7 +1607,7 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
// region split.
if (EnableAdvancedRASplitCost && CanCauseEvictionChain &&
splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) {
- // This interfernce cause our eviction from this assignment, we might
+ // This interference cause our eviction from this assignment, we might
// evict somebody else, add that cost.
// See splitCanCauseEvictionChain for detailed description of
// scenarios.
@@ -1575,7 +1642,8 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// These are the intervals created for new global ranges. We may create more
// intervals for local ranges.
const unsigned NumGlobalIntvs = LREdit.size();
- DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n");
+ LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
+ << " globals.\n");
assert(NumGlobalIntvs && "No global intervals configured");
// Isolate even single instructions when dealing with a proper sub-class.
@@ -1612,7 +1680,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// Create separate intervals for isolated blocks with multiple uses.
if (!IntvIn && !IntvOut) {
- DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
+ LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
SE->splitSingleBlock(BI);
continue;
@@ -1694,8 +1762,8 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// blocks is strictly decreasing.
if (IntvMap[i] < NumGlobalIntvs) {
if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
- DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
- << " blocks as original.\n");
+ LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
+ << " blocks as original.\n");
// Don't allow repeated splitting as a safe guard against looping.
setStage(Reg, RS_Split2);
}
@@ -1710,8 +1778,21 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
MF->verify(this, "After splitting live range around region");
}
+// Global split has high compile time cost especially for large live range.
+// Return false for the case here where the potential benefit will never
+// worth the cost.
+unsigned RAGreedy::isSplitBenefitWorthCost(LiveInterval &VirtReg) {
+ MachineInstr *MI = MRI->getUniqueVRegDef(VirtReg.reg);
+ if (MI && TII->isTriviallyReMaterializable(*MI, AA) &&
+ VirtReg.size() > HugeSizeForSplit)
+ return false;
+ return true;
+}
+
unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<unsigned> &NewVRegs) {
+ if (!isSplitBenefitWorthCost(VirtReg))
+ return 0;
unsigned NumCands = 0;
BlockFrequency SpillCost = calcSpillCost();
BlockFrequency BestCost;
@@ -1726,8 +1807,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// No benefit from the compact region, our fallback will be per-block
// splitting. Make sure we find a solution that is cheaper than spilling.
BestCost = SpillCost;
- DEBUG(dbgs() << "Cost of isolating all blocks = ";
- MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
+ LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = ";
+ MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
}
bool CanCauseEvictionChain = false;
@@ -1790,13 +1871,13 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
SpillPlacer->prepare(Cand.LiveBundles);
BlockFrequency Cost;
if (!addSplitConstraints(Cand.Intf, Cost)) {
- DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
+ LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
continue;
}
- DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
- MBFI->printBlockFreq(dbgs(), Cost));
+ LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
+ MBFI->printBlockFreq(dbgs(), Cost));
if (Cost >= BestCost) {
- DEBUG({
+ LLVM_DEBUG({
if (BestCand == NoCand)
dbgs() << " worse than no bundles\n";
else
@@ -1811,15 +1892,15 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
// No live bundles, defer to splitSingleBlocks().
if (!Cand.LiveBundles.any()) {
- DEBUG(dbgs() << " no bundles.\n");
+ LLVM_DEBUG(dbgs() << " no bundles.\n");
continue;
}
bool HasEvictionChain = false;
Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain);
- DEBUG({
- dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost)
- << " with bundles";
+ LLVM_DEBUG({
+ dbgs() << ", total = ";
+ MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
for (int i : Cand.LiveBundles.set_bits())
dbgs() << " EB#" << i;
dbgs() << ".\n";
@@ -1838,11 +1919,11 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
if (CanCauseEvictionChain && BestCand != NoCand) {
// See splitCanCauseEvictionChain for detailed description of bad
// eviction chain scenarios.
- DEBUG(dbgs() << "Best split candidate of vreg "
- << printReg(VirtReg.reg, TRI) << " may ");
+ LLVM_DEBUG(dbgs() << "Best split candidate of vreg "
+ << printReg(VirtReg.reg, TRI) << " may ");
if (!(*CanCauseEvictionChain))
- DEBUG(dbgs() << "not ");
- DEBUG(dbgs() << "cause bad eviction chain\n");
+ LLVM_DEBUG(dbgs() << "not ");
+ LLVM_DEBUG(dbgs() << "cause bad eviction chain\n");
}
return BestCand;
@@ -1865,8 +1946,8 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
UsedCands.push_back(BestCand);
Cand.IntvIdx = SE->openIntv();
- DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
- << B << " bundles, intv " << Cand.IntvIdx << ".\n");
+ LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
+ << B << " bundles, intv " << Cand.IntvIdx << ".\n");
(void)B;
}
}
@@ -1878,8 +1959,8 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
if (unsigned B = Cand.getBundles(BundleCand, 0)) {
UsedCands.push_back(0);
Cand.IntvIdx = SE->openIntv();
- DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv "
- << Cand.IntvIdx << ".\n");
+ LLVM_DEBUG(dbgs() << "Split for compact region in " << B
+ << " bundles, intv " << Cand.IntvIdx << ".\n");
(void)B;
}
}
@@ -1978,7 +2059,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
if (Uses.size() <= 1)
return 0;
- DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
+ LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
+ << " individual instrs.\n");
const TargetRegisterClass *SuperRC =
TRI->getLargestLegalSuperClass(CurRC, *MF);
@@ -1993,7 +2075,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SuperRCNumAllocatableRegs ==
getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII,
TRI, RCI)) {
- DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI);
+ LLVM_DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI);
continue;
}
SE->openIntv();
@@ -2003,7 +2085,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
}
if (LREdit.empty()) {
- DEBUG(dbgs() << "All uses were copies.\n");
+ LLVM_DEBUG(dbgs() << "All uses were copies.\n");
return 0;
}
@@ -2121,7 +2203,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
return 0;
const unsigned NumGaps = Uses.size()-1;
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "tryLocalSplit: ";
for (unsigned i = 0, e = Uses.size(); i != e; ++i)
dbgs() << ' ' << Uses[i];
@@ -2134,7 +2216,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
if (Matrix->checkRegMaskInterference(VirtReg)) {
// Get regmask slots for the whole block.
ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
- DEBUG(dbgs() << RMS.size() << " regmasks in block:");
+ LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
// Constrain to VirtReg's live range.
unsigned ri = std::lower_bound(RMS.begin(), RMS.end(),
Uses.front().getRegSlot()) - RMS.begin();
@@ -2148,14 +2230,15 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// overlap the live range.
if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps)
break;
- DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]);
+ LLVM_DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-'
+ << Uses[i + 1]);
RegMaskGaps.push_back(i);
// Advance ri to the next gap. A regmask on one of the uses counts in
// both gaps.
while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1]))
++ri;
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
}
// Since we allow local split results to be split again, there is a risk of
@@ -2214,13 +2297,12 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
- DEBUG(dbgs() << printReg(PhysReg, TRI) << ' '
- << Uses[SplitBefore] << '-' << Uses[SplitAfter]
- << " i=" << MaxGap);
+ LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
+ << '-' << Uses[SplitAfter] << " i=" << MaxGap);
// Stop before the interval gets so big we wouldn't be making progress.
if (!LiveBefore && !LiveAfter) {
- DEBUG(dbgs() << " all\n");
+ LLVM_DEBUG(dbgs() << " all\n");
break;
}
// Should the interval be extended or shrunk?
@@ -2245,12 +2327,12 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
1);
// Would this split be possible to allocate?
// Never allocate all gaps, we wouldn't be making progress.
- DEBUG(dbgs() << " w=" << EstWeight);
+ LLVM_DEBUG(dbgs() << " w=" << EstWeight);
if (EstWeight * Hysteresis >= MaxGap) {
Shrink = false;
float Diff = EstWeight - MaxGap;
if (Diff > BestDiff) {
- DEBUG(dbgs() << " (best)");
+ LLVM_DEBUG(dbgs() << " (best)");
BestDiff = Hysteresis * Diff;
BestBefore = SplitBefore;
BestAfter = SplitAfter;
@@ -2261,7 +2343,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Try to shrink.
if (Shrink) {
if (++SplitBefore < SplitAfter) {
- DEBUG(dbgs() << " shrink\n");
+ LLVM_DEBUG(dbgs() << " shrink\n");
// Recompute the max when necessary.
if (GapWeight[SplitBefore - 1] >= MaxGap) {
MaxGap = GapWeight[SplitBefore];
@@ -2275,11 +2357,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Try to extend the interval.
if (SplitAfter >= NumGaps) {
- DEBUG(dbgs() << " end\n");
+ LLVM_DEBUG(dbgs() << " end\n");
break;
}
- DEBUG(dbgs() << " extend\n");
+ LLVM_DEBUG(dbgs() << " extend\n");
MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
}
}
@@ -2288,9 +2370,9 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
if (BestBefore == NumGaps)
return 0;
- DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
- << '-' << Uses[BestAfter] << ", " << BestDiff
- << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
+ LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
+ << Uses[BestAfter] << ", " << BestDiff << ", "
+ << (BestAfter - BestBefore + 1) << " instrs\n");
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit);
@@ -2310,14 +2392,14 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
if (NewGaps >= NumGaps) {
- DEBUG(dbgs() << "Tagging non-progress ranges: ");
+ LLVM_DEBUG(dbgs() << "Tagging non-progress ranges: ");
assert(!ProgressRequired && "Didn't make progress when it was required.");
for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
if (IntvMap[i] == 1) {
setStage(LIS->getInterval(LREdit.get(i)), RS_Split2);
- DEBUG(dbgs() << printReg(LREdit.get(i)));
+ LLVM_DEBUG(dbgs() << printReg(LREdit.get(i)));
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
}
++NumLocalSplits;
@@ -2410,7 +2492,7 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
// chances are one would not be recolorable.
if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >=
LastChanceRecoloringMaxInterference && !ExhaustiveSearch) {
- DEBUG(dbgs() << "Early abort: too many interferences.\n");
+ LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
CutOffInfo |= CO_Interf;
return false;
}
@@ -2424,7 +2506,8 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
MRI->getRegClass(Intf->reg) == CurRC) &&
!(hasTiedDef(MRI, VirtReg.reg) && !hasTiedDef(MRI, Intf->reg))) ||
FixedRegisters.count(Intf->reg)) {
- DEBUG(dbgs() << "Early abort: the interference is not recolorable.\n");
+ LLVM_DEBUG(
+ dbgs() << "Early abort: the interference is not recolorable.\n");
return false;
}
RecoloringCandidates.insert(Intf);
@@ -2477,7 +2560,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
SmallVectorImpl<unsigned> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
unsigned Depth) {
- DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
+ LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
// Ranges must be Done.
assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
"Last chance recoloring should really be last chance");
@@ -2486,7 +2569,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
// for target with hundreds of registers.
// Indeed, in that case we may want to cut the search space earlier.
if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
- DEBUG(dbgs() << "Abort because max depth has been reached.\n");
+ LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
CutOffInfo |= CO_Depth;
return ~0u;
}
@@ -2503,8 +2586,8 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
Order.rewind();
while (unsigned PhysReg = Order.next()) {
- DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
- << printReg(PhysReg, TRI) << '\n');
+ LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
+ << printReg(PhysReg, TRI) << '\n');
RecoloringCandidates.clear();
VirtRegToPhysReg.clear();
CurrentNewVRegs.clear();
@@ -2512,7 +2595,8 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
// It is only possible to recolor virtual register interference.
if (Matrix->checkInterference(VirtReg, PhysReg) >
LiveRegMatrix::IK_VirtReg) {
- DEBUG(dbgs() << "Some interferences are not with virtual registers.\n");
+ LLVM_DEBUG(
+ dbgs() << "Some interferences are not with virtual registers.\n");
continue;
}
@@ -2521,7 +2605,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
// the interferences.
if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
FixedRegisters)) {
- DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
+ LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
continue;
}
@@ -2535,7 +2619,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
unsigned ItVirtReg = (*It)->reg;
enqueue(RecoloringQueue, *It);
assert(VRM->hasPhys(ItVirtReg) &&
- "Interferences are supposed to be with allocated vairables");
+ "Interferences are supposed to be with allocated variables");
// Record the current allocation.
VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg);
@@ -2563,8 +2647,8 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
return PhysReg;
}
- DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
- << printReg(PhysReg, TRI) << '\n');
+ LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
+ << printReg(PhysReg, TRI) << '\n');
// The recoloring attempt failed, undo the changes.
FixedRegisters = SaveFixedRegisters;
@@ -2611,7 +2695,7 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
unsigned Depth) {
while (!RecoloringQueue.empty()) {
LiveInterval *LI = dequeue(RecoloringQueue);
- DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
+ LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
unsigned PhysReg;
PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
// When splitting happens, the live-range may actually be empty.
@@ -2623,11 +2707,12 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
if (!PhysReg) {
assert(LI->empty() && "Only empty live-range do not require a register");
- DEBUG(dbgs() << "Recoloring of " << *LI << " succeeded. Empty LI.\n");
+ LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
+ << " succeeded. Empty LI.\n");
continue;
}
- DEBUG(dbgs() << "Recoloring of " << *LI
- << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
+ LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
+ << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
Matrix->assign(*LI, PhysReg);
FixedRegisters.insert(LI->reg);
@@ -2735,7 +2820,7 @@ void RAGreedy::initializeCSRCost() {
CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
}
-/// \brief Collect the hint info for \p Reg.
+/// Collect the hint info for \p Reg.
/// The results are stored into \p Out.
/// \p Out is not cleared before being populated.
void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
@@ -2759,7 +2844,7 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
}
}
-/// \brief Using the given \p List, compute the cost of the broken hints if
+/// Using the given \p List, compute the cost of the broken hints if
/// \p PhysReg was used.
/// \return The cost of \p List for \p PhysReg.
BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
@@ -2772,7 +2857,7 @@ BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
return Cost;
}
-/// \brief Using the register assigned to \p VirtReg, try to recolor
+/// Using the register assigned to \p VirtReg, try to recolor
/// all the live ranges that are copy-related with \p VirtReg.
/// The recoloring is then propagated to all the live-ranges that have
/// been recolored and so on, until no more copies can be coalesced or
@@ -2794,8 +2879,8 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
Visited.insert(Reg);
RecoloringCandidates.push_back(Reg);
- DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI) << '('
- << printReg(PhysReg, TRI) << ")\n");
+ LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
+ << '(' << printReg(PhysReg, TRI) << ")\n");
do {
Reg = RecoloringCandidates.pop_back_val();
@@ -2816,8 +2901,8 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
Matrix->checkInterference(LI, PhysReg)))
continue;
- DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
- << ") is recolorable.\n");
+ LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
+ << ") is recolorable.\n");
// Gather the hint info.
Info.clear();
@@ -2825,19 +2910,20 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
// Check if recoloring the live-range will increase the cost of the
// non-identity copies.
if (CurrPhys != PhysReg) {
- DEBUG(dbgs() << "Checking profitability:\n");
+ LLVM_DEBUG(dbgs() << "Checking profitability:\n");
BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
- DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
- << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n');
+ LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
+ << "\nNew Cost: " << NewCopiesCost.getFrequency()
+ << '\n');
if (OldCopiesCost < NewCopiesCost) {
- DEBUG(dbgs() << "=> Not profitable.\n");
+ LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
continue;
}
// At this point, the cost is either cheaper or equal. If it is
// equal, we consider this is profitable because it may expose
// more recoloring opportunities.
- DEBUG(dbgs() << "=> Profitable.\n");
+ LLVM_DEBUG(dbgs() << "=> Profitable.\n");
// Recolor the live-range.
Matrix->unassign(LI);
Matrix->assign(LI, PhysReg);
@@ -2851,7 +2937,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
} while (!RecoloringCandidates.empty());
}
-/// \brief Try to recolor broken hints.
+/// Try to recolor broken hints.
/// Broken hints may be repaired by recoloring when an evicted variable
/// freed up a register for a larger live-range.
/// Consider the following example:
@@ -2925,8 +3011,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
}
LiveRangeStage Stage = getStage(VirtReg);
- DEBUG(dbgs() << StageName[Stage]
- << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
+ LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
+ << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
// Try to evict a less worthy live range, but only for ranges from the primary
// queue. The RS_Split ranges already failed to do this, and they should not
@@ -2955,7 +3041,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// This gives a better picture of the interference to split around.
if (Stage < RS_Split) {
setStage(VirtReg, RS_Split);
- DEBUG(dbgs() << "wait for second round\n");
+ LLVM_DEBUG(dbgs() << "wait for second round\n");
NewVRegs.push_back(VirtReg.reg);
return 0;
}
@@ -2984,7 +3070,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// We would need a deep integration with the spiller to do the
// right thing here. Anyway, that is still good for early testing.
setStage(VirtReg, RS_Memory);
- DEBUG(dbgs() << "Do as if this register is in memory\n");
+ LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
NewVRegs.push_back(VirtReg.reg);
} else {
NamedRegionTimer T("spill", "Spiller", TimerGroupName,
@@ -3070,8 +3156,8 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
}
bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
- DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
- << "********** Function: " << mf.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
+ << "********** Function: " << mf.getName() << '\n');
MF = &mf;
TRI = MF->getSubtarget().getRegisterInfo();
@@ -3106,7 +3192,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI);
- DEBUG(LIS->dump());
+ LLVM_DEBUG(LIS->dump());
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI));
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
index 69a879701fae..c19001c8403d 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -62,6 +62,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -159,25 +160,25 @@ private:
/// always available for the remat of all the siblings of the original reg.
SmallPtrSet<MachineInstr *, 32> DeadRemats;
- /// \brief Finds the initial set of vreg intervals to allocate.
+ /// Finds the initial set of vreg intervals to allocate.
void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
- /// \brief Constructs an initial graph.
+ /// Constructs an initial graph.
void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller);
- /// \brief Spill the given VReg.
+ /// Spill the given VReg.
void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals,
MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
Spiller &VRegSpiller);
- /// \brief Given a solved PBQP problem maps this solution back to a register
+ /// Given a solved PBQP problem maps this solution back to a register
/// assignment.
bool mapPBQPToRegAlloc(const PBQPRAGraph &G,
const PBQP::Solution &Solution,
VirtRegMap &VRM,
Spiller &VRegSpiller);
- /// \brief Postprocessing before final spilling. Sets basic block "live in"
+ /// Postprocessing before final spilling. Sets basic block "live in"
/// variables.
void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
VirtRegMap &VRM) const;
@@ -187,7 +188,7 @@ private:
char RegAllocPBQP::ID = 0;
-/// @brief Set spill costs for each node in the PBQP reg-alloc graph.
+/// Set spill costs for each node in the PBQP reg-alloc graph.
class SpillCosts : public PBQPRAConstraint {
public:
void apply(PBQPRAGraph &G) override {
@@ -211,7 +212,7 @@ public:
}
};
-/// @brief Add interference edges between overlapping vregs.
+/// Add interference edges between overlapping vregs.
class Interference : public PBQPRAConstraint {
private:
using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *;
@@ -561,16 +562,7 @@ void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
if (MRI.reg_nodbg_empty(Reg))
continue;
- LiveInterval &LI = LIS.getInterval(Reg);
-
- // If this live interval is non-empty we will use pbqp to allocate it.
- // Empty intervals we allocate in a simple post-processing stage in
- // finalizeAlloc.
- if (!LI.empty()) {
- VRegsToAlloc.insert(LI.reg);
- } else {
- EmptyIntervalVRegs.insert(LI.reg);
- }
+ VRegsToAlloc.insert(Reg);
}
}
@@ -594,13 +586,24 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
+ std::map<unsigned, std::vector<unsigned>> VRegAllowedMap;
+
while (!Worklist.empty()) {
unsigned VReg = Worklist.back();
Worklist.pop_back();
- const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
LiveInterval &VRegLI = LIS.getInterval(VReg);
+ // If this is an empty interval move it to the EmptyIntervalVRegs set then
+ // continue.
+ if (VRegLI.empty()) {
+ EmptyIntervalVRegs.insert(VRegLI.reg);
+ VRegsToAlloc.erase(VRegLI.reg);
+ continue;
+ }
+
+ const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
+
// Record any overlaps with regmask operands.
BitVector RegMaskOverlaps;
LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps);
@@ -639,8 +642,22 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end());
continue;
+ } else
+ VRegAllowedMap[VReg] = std::move(VRegAllowed);
+ }
+
+ for (auto &KV : VRegAllowedMap) {
+ auto VReg = KV.first;
+
+ // Move empty intervals to the EmptyIntervalVReg set.
+ if (LIS.getInterval(VReg).empty()) {
+ EmptyIntervalVRegs.insert(VReg);
+ VRegsToAlloc.erase(VReg);
+ continue;
}
+ auto &VRegAllowed = KV.second;
+
PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
// Tweak cost of callee saved registers, as using then force spilling and
@@ -668,8 +685,8 @@ void RegAllocPBQP::spillVReg(unsigned VReg,
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
(void)TRI;
- DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: "
- << LRE.getParent().weight << ", New vregs: ");
+ LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: "
+ << LRE.getParent().weight << ", New vregs: ");
// Copy any newly inserted live intervals into the list of regs to
// allocate.
@@ -677,11 +694,11 @@ void RegAllocPBQP::spillVReg(unsigned VReg,
I != E; ++I) {
const LiveInterval &LI = LIS.getInterval(*I);
assert(!LI.empty() && "Empty spill range.");
- DEBUG(dbgs() << printReg(LI.reg, &TRI) << " ");
+ LLVM_DEBUG(dbgs() << printReg(LI.reg, &TRI) << " ");
VRegsToAlloc.insert(LI.reg);
}
- DEBUG(dbgs() << ")\n");
+ LLVM_DEBUG(dbgs() << ")\n");
}
bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
@@ -707,8 +724,8 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) {
unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1];
- DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> "
- << TRI.getName(PReg) << "\n");
+ LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> "
+ << TRI.getName(PReg) << "\n");
assert(PReg != 0 && "Invalid preg selected.");
VRM.assignVirt2Phys(VReg, PReg);
} else {
@@ -784,7 +801,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
MF.getRegInfo().freezeReservedRegs(MF);
- DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");
// Allocator main loop:
//
@@ -819,7 +836,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
unsigned Round = 0;
while (!PBQPAllocComplete) {
- DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n");
+ LLVM_DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n");
PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI));
initializeGraph(G, VRM, *VRegSpiller);
@@ -833,8 +850,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
".pbqpgraph";
std::error_code EC;
raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text);
- DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
- << GraphFileName << "\"\n");
+ LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
+ << GraphFileName << "\"\n");
G.dump(OS);
}
#endif
@@ -851,7 +868,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
VRegsToAlloc.clear();
EmptyIntervalVRegs.clear();
- DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n");
+ LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n");
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index f49ea25bbf35..f1c442ac38ae 100644
--- a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -36,11 +36,8 @@ using namespace llvm;
STATISTIC(NumCSROpt,
"Number of functions optimized for callee saved registers");
-namespace llvm {
-void initializeRegUsageInfoCollectorPass(PassRegistry &);
-}
-
namespace {
+
class RegUsageInfoCollector : public MachineFunctionPass {
public:
RegUsageInfoCollector() : MachineFunctionPass(ID) {
@@ -52,12 +49,21 @@ public:
return "Register Usage Information Collector Pass";
}
- void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<PhysicalRegisterUsageInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
bool runOnMachineFunction(MachineFunction &MF) override;
+ // Call determineCalleeSaves and then also set the bits for subregs and
+ // fully saved superregs.
+ static void computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF);
+
static char ID;
};
+
} // end of anonymous namespace
char RegUsageInfoCollector::ID = 0;
@@ -72,36 +78,32 @@ FunctionPass *llvm::createRegUsageInfoCollector() {
return new RegUsageInfoCollector();
}
-void RegUsageInfoCollector::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<PhysicalRegisterUsageInfo>();
- AU.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(AU);
-}
-
bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo *MRI = &MF.getRegInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const TargetMachine &TM = MF.getTarget();
- DEBUG(dbgs() << " -------------------- " << getPassName()
- << " -------------------- \n");
- DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n");
+ LLVM_DEBUG(dbgs() << " -------------------- " << getPassName()
+ << " -------------------- \n");
+ LLVM_DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n");
std::vector<uint32_t> RegMask;
// Compute the size of the bit vector to represent all the registers.
// The bit vector is broken into 32-bit chunks, thus takes the ceil of
// the number of registers divided by 32 for the size.
- unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
- RegMask.resize(RegMaskSize, 0xFFFFFFFF);
+ unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
+ RegMask.resize(RegMaskSize, ~((uint32_t)0));
const Function &F = MF.getFunction();
- PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>();
+ PhysicalRegisterUsageInfo &PRUI = getAnalysis<PhysicalRegisterUsageInfo>();
+ PRUI.setTargetMachine(TM);
- PRUI->setTargetMachine(&TM);
+ LLVM_DEBUG(dbgs() << "Clobbered Registers: ");
- DEBUG(dbgs() << "Clobbered Registers: ");
+ BitVector SavedRegs;
+ computeCalleeSavedRegs(SavedRegs, MF);
const BitVector &UsedPhysRegsMask = MRI->getUsedPhysRegsMask();
auto SetRegAsDefined = [&RegMask] (unsigned Reg) {
@@ -110,42 +112,82 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
// Scan all the physical registers. When a register is defined in the current
// function set it and all the aliasing registers as defined in the regmask.
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
- // If a register is in the UsedPhysRegsMask set then mark it as defined.
- // All it's aliases will also be in the set, so we can skip setting
- // as defined all the aliases here.
- if (UsedPhysRegsMask.test(PReg)) {
- SetRegAsDefined(PReg);
+ // Don't count registers that are saved and restored.
+ if (SavedRegs.test(PReg))
continue;
- }
// If a register is defined by an instruction mark it as defined together
- // with all it's aliases.
+ // with all it's unsaved aliases.
if (!MRI->def_empty(PReg)) {
for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI)
- SetRegAsDefined(*AI);
+ if (!SavedRegs.test(*AI))
+ SetRegAsDefined(*AI);
+ continue;
}
+ // If a register is in the UsedPhysRegsMask set then mark it as defined.
+ // All clobbered aliases will also be in the set, so we can skip setting
+ // as defined all the aliases here.
+ if (UsedPhysRegsMask.test(PReg))
+ SetRegAsDefined(PReg);
}
- if (!TargetFrameLowering::isSafeForNoCSROpt(F)) {
- const uint32_t *CallPreservedMask =
- TRI->getCallPreservedMask(MF, F.getCallingConv());
- if (CallPreservedMask) {
- // Set callee saved register as preserved.
- for (unsigned i = 0; i < RegMaskSize; ++i)
- RegMask[i] = RegMask[i] | CallPreservedMask[i];
- }
- } else {
+ if (TargetFrameLowering::isSafeForNoCSROpt(F)) {
++NumCSROpt;
- DEBUG(dbgs() << MF.getName()
- << " function optimized for not having CSR.\n");
+ LLVM_DEBUG(dbgs() << MF.getName()
+ << " function optimized for not having CSR.\n");
}
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
- DEBUG(dbgs() << printReg(PReg, TRI) << " ");
+ LLVM_DEBUG(dbgs() << printReg(PReg, TRI) << " ");
- DEBUG(dbgs() << " \n----------------------------------------\n");
+ LLVM_DEBUG(dbgs() << " \n----------------------------------------\n");
- PRUI->storeUpdateRegUsageInfo(&F, std::move(RegMask));
+ PRUI.storeUpdateRegUsageInfo(F, RegMask);
return false;
}
+
+void RegUsageInfoCollector::
+computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
+ // Target will return the set of registers that it saves/restores as needed.
+ SavedRegs.clear();
+ TFI.determineCalleeSaves(MF, SavedRegs);
+
+ // Insert subregs.
+ const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (SavedRegs.test(Reg))
+ for (MCSubRegIterator SR(Reg, &TRI, false); SR.isValid(); ++SR)
+ SavedRegs.set(*SR);
+ }
+
+ // Insert any register fully saved via subregisters.
+ for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) {
+ if (SavedRegs.test(PReg))
+ continue;
+
+ // Check if PReg is fully covered by its subregs.
+ bool CoveredBySubRegs = false;
+ for (const TargetRegisterClass *RC : TRI.regclasses())
+ if (RC->CoveredBySubRegs && RC->contains(PReg)) {
+ CoveredBySubRegs = true;
+ break;
+ }
+ if (!CoveredBySubRegs)
+ continue;
+
+ // Add PReg to SavedRegs if all subregs are saved.
+ bool AllSubRegsSaved = true;
+ for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR)
+ if (!SavedRegs.test(*SR)) {
+ AllSubRegsSaved = false;
+ break;
+ }
+ if (AllSubRegsSaved)
+ SavedRegs.set(PReg);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
index 5b12d00e126f..256de295821d 100644
--- a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
+++ b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -34,10 +34,6 @@
#include <map>
#include <string>
-namespace llvm {
-void initializeRegUsageInfoPropagationPassPass(PassRegistry &);
-}
-
using namespace llvm;
#define DEBUG_TYPE "ip-regalloc"
@@ -45,54 +41,56 @@ using namespace llvm;
#define RUIP_NAME "Register Usage Information Propagation"
namespace {
-class RegUsageInfoPropagationPass : public MachineFunctionPass {
+class RegUsageInfoPropagation : public MachineFunctionPass {
public:
- RegUsageInfoPropagationPass() : MachineFunctionPass(ID) {
+ RegUsageInfoPropagation() : MachineFunctionPass(ID) {
PassRegistry &Registry = *PassRegistry::getPassRegistry();
- initializeRegUsageInfoPropagationPassPass(Registry);
+ initializeRegUsageInfoPropagationPass(Registry);
}
StringRef getPassName() const override { return RUIP_NAME; }
bool runOnMachineFunction(MachineFunction &MF) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<PhysicalRegisterUsageInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
static char ID;
private:
- static void setRegMask(MachineInstr &MI, const uint32_t *RegMask) {
+ static void setRegMask(MachineInstr &MI, ArrayRef<uint32_t> RegMask) {
+ assert(RegMask.size() ==
+ MachineOperand::getRegMaskSize(MI.getParent()->getParent()
+ ->getRegInfo().getTargetRegisterInfo()
+ ->getNumRegs())
+ && "expected register mask size");
for (MachineOperand &MO : MI.operands()) {
if (MO.isRegMask())
- MO.setRegMask(RegMask);
+ MO.setRegMask(RegMask.data());
}
}
};
+
} // end of anonymous namespace
-char RegUsageInfoPropagationPass::ID = 0;
-INITIALIZE_PASS_BEGIN(RegUsageInfoPropagationPass, "reg-usage-propagation",
+INITIALIZE_PASS_BEGIN(RegUsageInfoPropagation, "reg-usage-propagation",
RUIP_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(PhysicalRegisterUsageInfo)
-INITIALIZE_PASS_END(RegUsageInfoPropagationPass, "reg-usage-propagation",
+INITIALIZE_PASS_END(RegUsageInfoPropagation, "reg-usage-propagation",
RUIP_NAME, false, false)
-FunctionPass *llvm::createRegUsageInfoPropPass() {
- return new RegUsageInfoPropagationPass();
-}
-
-void RegUsageInfoPropagationPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<PhysicalRegisterUsageInfo>();
- AU.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(AU);
-}
+char RegUsageInfoPropagation::ID = 0;
// Assumes call instructions have a single reference to a function.
-static const Function *findCalledFunction(const Module &M, MachineInstr &MI) {
- for (MachineOperand &MO : MI.operands()) {
+static const Function *findCalledFunction(const Module &M,
+ const MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isGlobal())
- return dyn_cast<Function>(MO.getGlobal());
+ return dyn_cast<const Function>(MO.getGlobal());
if (MO.isSymbol())
return M.getFunction(MO.getSymbolName());
@@ -101,13 +99,13 @@ static const Function *findCalledFunction(const Module &M, MachineInstr &MI) {
return nullptr;
}
-bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) {
- const Module *M = MF.getFunction().getParent();
+bool RegUsageInfoPropagation::runOnMachineFunction(MachineFunction &MF) {
+ const Module &M = *MF.getFunction().getParent();
PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>();
- DEBUG(dbgs() << " ++++++++++++++++++++ " << getPassName()
- << " ++++++++++++++++++++ \n");
- DEBUG(dbgs() << "MachineFunction : " << MF.getName() << "\n");
+ LLVM_DEBUG(dbgs() << " ++++++++++++++++++++ " << getPassName()
+ << " ++++++++++++++++++++ \n");
+ LLVM_DEBUG(dbgs() << "MachineFunction : " << MF.getName() << "\n");
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (!MFI.hasCalls() && !MFI.hasTailCall())
@@ -119,30 +117,37 @@ bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) {
for (MachineInstr &MI : MBB) {
if (!MI.isCall())
continue;
- DEBUG(dbgs()
- << "Call Instruction Before Register Usage Info Propagation : \n");
- DEBUG(dbgs() << MI << "\n");
-
- auto UpdateRegMask = [&](const Function *F) {
- const auto *RegMask = PRUI->getRegUsageInfo(F);
- if (!RegMask)
+ LLVM_DEBUG(
+ dbgs()
+ << "Call Instruction Before Register Usage Info Propagation : \n");
+ LLVM_DEBUG(dbgs() << MI << "\n");
+
+ auto UpdateRegMask = [&](const Function &F) {
+ const ArrayRef<uint32_t> RegMask = PRUI->getRegUsageInfo(F);
+ if (RegMask.empty())
return;
- setRegMask(MI, &(*RegMask)[0]);
+ setRegMask(MI, RegMask);
Changed = true;
};
- if (const Function *F = findCalledFunction(*M, MI)) {
- UpdateRegMask(F);
+ if (const Function *F = findCalledFunction(M, MI)) {
+ UpdateRegMask(*F);
} else {
- DEBUG(dbgs() << "Failed to find call target function\n");
+ LLVM_DEBUG(dbgs() << "Failed to find call target function\n");
}
- DEBUG(dbgs() << "Call Instruction After Register Usage Info Propagation : "
- << MI << '\n');
+ LLVM_DEBUG(
+ dbgs() << "Call Instruction After Register Usage Info Propagation : "
+ << MI << '\n');
}
}
- DEBUG(dbgs() << " +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
- "++++++ \n");
+ LLVM_DEBUG(
+ dbgs() << " +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
+ "++++++ \n");
return Changed;
}
+
+FunctionPass *llvm::createRegUsageInfoPropPass() {
+ return new RegUsageInfoPropagation();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
index b0eeb81f583e..add8faec97d4 100644
--- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -49,9 +49,6 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
if (MF->getSubtarget().getRegisterInfo() != TRI) {
TRI = MF->getSubtarget().getRegisterInfo();
RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
- unsigned NumPSets = TRI->getNumRegPressureSets();
- PSetLimits.reset(new unsigned[NumPSets]);
- std::fill(&PSetLimits[0], &PSetLimits[NumPSets], 0);
Update = true;
}
@@ -80,8 +77,12 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
}
// Invalidate cached information from previous function.
- if (Update)
+ if (Update) {
+ unsigned NumPSets = TRI->getNumRegPressureSets();
+ PSetLimits.reset(new unsigned[NumPSets]);
+ std::fill(&PSetLimits[0], &PSetLimits[NumPSets], 0);
++Tag;
+ }
}
/// compute - Compute the preferred allocation order for RC with reserved
@@ -150,7 +151,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
RCI.MinCost = uint8_t(MinCost);
RCI.LastCostChange = LastCostChange;
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "AllocationOrder(" << TRI->getRegClassName(RC) << ") = [";
for (unsigned I = 0; I != RCI.NumRegs; ++I)
dbgs() << ' ' << printReg(RCI.Order[I], TRI);
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 00a2e93c71ca..cad13a60efd2 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -115,11 +115,11 @@ namespace {
/// checked for smaller live intervals.
bool ShrinkMainRange;
- /// \brief True if the coalescer should aggressively coalesce global copies
+ /// True if the coalescer should aggressively coalesce global copies
/// in favor of keeping local copies.
bool JoinGlobalCopies;
- /// \brief True if the coalescer should aggressively coalesce fall-thru
+ /// True if the coalescer should aggressively coalesce fall-thru
/// blocks exclusively containing copies.
bool JoinSplitEdges;
@@ -162,7 +162,7 @@ namespace {
/// was successfully coalesced away. If it is not currently possible to
/// coalesce this interval, but it may be possible if other things get
/// coalesced, then it returns true by reference in 'Again'.
- bool joinCopy(MachineInstr *TheCopy, bool &Again);
+ bool joinCopy(MachineInstr *CopyMI, bool &Again);
/// Attempt to join these two intervals. On failure, this
/// returns false. The output "SrcInt" will not have been modified, so we
@@ -233,9 +233,11 @@ namespace {
void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
MachineOperand &MO, unsigned SubRegIdx);
- /// Handle copies of undef values.
- /// Returns true if @p CopyMI was a copy of an undef value and eliminated.
- bool eliminateUndefCopy(MachineInstr *CopyMI);
+ /// Handle copies of undef values. If the undef value is an incoming
+ /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF.
+ /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise,
+ /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point).
+ MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI);
/// Check whether or not we should apply the terminal rule on the
/// destination (Dst) of \p Copy.
@@ -568,7 +570,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
// in IntB, we can merge them.
if (ValS+1 != BS) return false;
- DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI));
+ LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI));
SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
// We are about to delete CopyMI, so need to remove it as the 'instruction
@@ -587,6 +589,13 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
// Do the same for the subregister segments.
for (LiveInterval::SubRange &S : IntB.subranges()) {
+ // Check for SubRange Segments of the form [1234r,1234d:0) which can be
+ // removed to prevent creating bogus SubRange Segments.
+ LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx);
+ if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) {
+ S.removeSegment(*SS, true);
+ continue;
+ }
VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
@@ -594,7 +603,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
S.MergeValueNumberInto(SubBValNo, SubValSNo);
}
- DEBUG(dbgs() << " result = " << IntB << '\n');
+ LLVM_DEBUG(dbgs() << " result = " << IntB << '\n');
// If the source instruction was killing the source register before the
// merge, unset the isKill marker given the live range has been extended.
@@ -603,11 +612,21 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
ValSEndInst->getOperand(UIdx).setIsKill(false);
}
- // Rewrite the copy. If the copy instruction was killing the destination
- // register before the merge, find the last use and trim the live range. That
- // will also add the isKill marker.
+ // Rewrite the copy.
CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
- if (AS->end == CopyIdx)
+ // If the copy instruction was killing the destination register or any
+ // subrange before the merge trim the live range.
+ bool RecomputeLiveRange = AS->end == CopyIdx;
+ if (!RecomputeLiveRange) {
+ for (LiveInterval::SubRange &S : IntA.subranges()) {
+ LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx);
+ if (SS != S.end() && SS->end == CopyIdx) {
+ RecomputeLiveRange = true;
+ break;
+ }
+ }
+ }
+ if (RecomputeLiveRange)
shrinkToUses(&IntA);
++numExtends;
@@ -641,7 +660,7 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
return false;
}
-/// Copy segements with value number @p SrcValNo from liverange @p Src to live
+/// Copy segments with value number @p SrcValNo from liverange @p Src to live
/// range @Dst and use value number @p DstValNo there.
static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
const LiveRange &Src, const VNInfo *SrcValNo) {
@@ -742,8 +761,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
return false;
}
- DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
- << *DefMI);
+ LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
+ << *DefMI);
// At this point we have decided that it is legal to do this
// transformation. Start by commuting the instruction.
@@ -812,7 +831,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
if (!DVNI)
continue;
- DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
+ LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
assert(DVNI->def == DefIdx);
BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
for (LiveInterval::SubRange &S : IntB.subranges()) {
@@ -853,11 +872,11 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
BValNo->def = AValNo->def;
addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
- DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
+ LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
LIS->removeVRegDefAt(IntA, AValNo->def);
- DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
+ LLVM_DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
++numCommutes;
return true;
}
@@ -989,13 +1008,24 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
return false;
- // Now ok to move copy.
+ // Now (almost sure it's) ok to move copy.
if (CopyLeftBB) {
- DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
- << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
+ // Position in CopyLeftBB where we should insert new copy.
+ auto InsPos = CopyLeftBB->getFirstTerminator();
+
+ // Make sure that B isn't referenced in the terminators (if any) at the end
+ // of the predecessor since we're about to insert a new definition of B
+ // before them.
+ if (InsPos != CopyLeftBB->end()) {
+ SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true);
+ if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB)))
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
+ << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
// Insert new copy to CopyLeftBB.
- auto InsPos = CopyLeftBB->getFirstTerminator();
MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
TII->get(TargetOpcode::COPY), IntB.reg)
.addReg(IntA.reg);
@@ -1010,8 +1040,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
// the deleted list.
ErasedInstrs.erase(NewCopyMI);
} else {
- DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
- << printMBBReference(MBB) << '\t' << CopyMI);
+ LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
+ << printMBBReference(MBB) << '\t' << CopyMI);
}
// Remove CopyMI.
@@ -1039,6 +1069,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
BValNo->markUnused();
LIS->extendToIndices(SR, EndPoints);
}
+ // If any dead defs were extended, truncate them.
+ shrinkToUses(&IntB);
// Finally, update the live-range of IntA.
shrinkToUses(&IntA);
@@ -1174,7 +1206,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
I != E; ++I) {
MachineOperand &MO = CopyMI->getOperand(I);
if (MO.isReg()) {
- assert(MO.isImplicit() && "No explicit operands after implict operands.");
+ assert(MO.isImplicit() && "No explicit operands after implicit operands.");
// Discard VReg implicit defs.
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
ImplicitOps.push_back(MO);
@@ -1220,6 +1252,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// Update machine operands and add flags.
updateRegDefsUses(DstReg, DstReg, DstIdx);
NewMI.getOperand(0).setSubReg(NewIdx);
+ // updateRegDefUses can add an "undef" flag to the definition, since
+ // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
+ // sure that "undef" is not set.
+ if (NewIdx == 0)
+ NewMI.getOperand(0).setIsUndef(false);
// Add dead subregister definitions if we are defining the whole register
// but only part of it is live.
// This could happen if the rematerialization instruction is rematerializing
@@ -1266,8 +1303,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
bool UpdatedSubRanges = false;
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
if ((SR.LaneMask & DstMask).none()) {
- DEBUG(dbgs() << "Removing undefined SubRange "
- << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
+ LLVM_DEBUG(dbgs()
+ << "Removing undefined SubRange "
+ << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
// VNI is in ValNo - remove any segments in this SubRange that have this ValNo
if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
SR.removeValNo(RmValNo);
@@ -1299,7 +1337,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// %1 = somedef ; %1 GR8
// dead ECX = remat ; implicit-def CL
// = somedef %1 ; %1 GR8
- // %1 will see the inteferences with CL but not with CH since
+ // %1 will see the interferences with CL but not with CH since
// no live-ranges would have been created for ECX.
// Fix that!
SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
@@ -1324,7 +1362,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
}
- DEBUG(dbgs() << "Remat: " << NewMI);
+ LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
++NumReMats;
// The source interval can become smaller because we removed a use.
@@ -1339,7 +1377,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// Move the debug value directly after the def of the rematerialized
// value in DstReg.
MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
- DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
+ LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
}
}
eliminateDeadDefs();
@@ -1348,9 +1386,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
return true;
}
-bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
- // ProcessImpicitDefs may leave some copies of <undef> values, it only removes
- // local variables. When we have a copy like:
+MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
+ // ProcessImplicitDefs may leave some copies of <undef> values, it only
+ // removes local variables. When we have a copy like:
//
// %1 = COPY undef %2
//
@@ -1372,16 +1410,34 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
if ((SR.LaneMask & SrcMask).none())
continue;
if (SR.liveAt(Idx))
- return false;
+ return nullptr;
}
} else if (SrcLI.liveAt(Idx))
- return false;
+ return nullptr;
- DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
-
- // Remove any DstReg segments starting at the instruction.
+ // If the undef copy defines a live-out value (i.e. an input to a PHI def),
+ // then replace it with an IMPLICIT_DEF.
LiveInterval &DstLI = LIS->getInterval(DstReg);
SlotIndex RegIndex = Idx.getRegSlot();
+ LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex);
+ assert(Seg != nullptr && "No segment for defining instruction");
+ if (VNInfo *V = DstLI.getVNInfoAt(Seg->end)) {
+ if (V->isPHIDef()) {
+ CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
+ MachineOperand &MO = CopyMI->getOperand(i-1);
+ if (MO.isReg() && MO.isUse())
+ CopyMI->RemoveOperand(i-1);
+ }
+ LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
+ "implicit def\n");
+ return CopyMI;
+ }
+ }
+
+ // Remove any DstReg segments starting at the instruction.
+ LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
+
// Remove value or merge with previous one in case of a subregister def.
if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
@@ -1424,7 +1480,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
if (isLive)
continue;
MO.setIsUndef(true);
- DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
+ LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
}
// A def of a subregister may be a use of the other subregisters, so
@@ -1437,7 +1493,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
MO.setIsUndef(true);
LIS->shrinkToUses(&DstLI);
- return true;
+ return CopyMI;
}
void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
@@ -1539,12 +1595,12 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
MO.substVirtReg(DstReg, SubIdx, *TRI);
}
- DEBUG({
- dbgs() << "\t\tupdated: ";
- if (!UseMI->isDebugValue())
- dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
- dbgs() << *UseMI;
- });
+ LLVM_DEBUG({
+ dbgs() << "\t\tupdated: ";
+ if (!UseMI->isDebugValue())
+ dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
+ dbgs() << *UseMI;
+ });
}
}
@@ -1553,7 +1609,7 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
// reserved register. This doesn't increase register pressure, so it is
// always beneficial.
if (!MRI->isReserved(CP.getDstReg())) {
- DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
+ LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
return false;
}
@@ -1561,17 +1617,18 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
if (JoinVInt.containsOneValue())
return true;
- DEBUG(dbgs() << "\tCannot join complex intervals into reserved register.\n");
+ LLVM_DEBUG(
+ dbgs() << "\tCannot join complex intervals into reserved register.\n");
return false;
}
bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
Again = false;
- DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
+ LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
CoalescerPair CP(*TRI);
if (!CP.setRegisters(CopyMI)) {
- DEBUG(dbgs() << "\tNot coalescable.\n");
+ LLVM_DEBUG(dbgs() << "\tNot coalescable.\n");
return false;
}
@@ -1586,7 +1643,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
}
if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
CP.getNewRC(), *LIS)) {
- DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
+ LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
return false;
}
}
@@ -1595,16 +1652,21 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
// sometimes dead copies slip through, and we can't generate invalid live
// ranges.
if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
- DEBUG(dbgs() << "\tCopy is dead.\n");
+ LLVM_DEBUG(dbgs() << "\tCopy is dead.\n");
DeadDefs.push_back(CopyMI);
eliminateDeadDefs();
return true;
}
// Eliminate undefs.
- if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) {
- deleteInstr(CopyMI);
- return false; // Not coalescable.
+ if (!CP.isPhys()) {
+ // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce.
+ if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) {
+ if (UndefMI->isImplicitDef())
+ return false;
+ deleteInstr(CopyMI);
+ return false; // Not coalescable.
+ }
}
// Coalesced copies are normally removed immediately, but transformations
@@ -1612,7 +1674,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
// When that happens, just join the values and remove the copy.
if (CP.getSrcReg() == CP.getDstReg()) {
LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
- DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
+ LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
LiveQueryResult LRQ = LI.Query(CopyIdx);
if (VNInfo *DefVNI = LRQ.valueDefined()) {
@@ -1629,7 +1691,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
S.MergeValueNumberInto(SDefVNI, SReadVNI);
}
}
- DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
+ LLVM_DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
}
deleteInstr(CopyMI);
return true;
@@ -1637,9 +1699,9 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
// Enforce policies.
if (CP.isPhys()) {
- DEBUG(dbgs() << "\tConsidering merging " << printReg(CP.getSrcReg(), TRI)
- << " with " << printReg(CP.getDstReg(), TRI, CP.getSrcIdx())
- << '\n');
+ LLVM_DEBUG(dbgs() << "\tConsidering merging "
+ << printReg(CP.getSrcReg(), TRI) << " with "
+ << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n');
if (!canJoinPhys(CP)) {
// Before giving up coalescing, if definition of source is defined by
// trivial computation, try rematerializing it.
@@ -1656,7 +1718,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
LIS->getInterval(CP.getDstReg()).size())
CP.flip();
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "\tConsidering merging to "
<< TRI->getRegClassName(CP.getNewRC()) << " with ";
if (CP.getDstIdx() && CP.getSrcIdx())
@@ -1692,7 +1754,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
if (adjustCopiesBackFrom(CP, CopyMI) ||
removeCopyByCommutingDef(CP, CopyMI)) {
deleteInstr(CopyMI);
- DEBUG(dbgs() << "\tTrivial!\n");
+ LLVM_DEBUG(dbgs() << "\tTrivial!\n");
return true;
}
}
@@ -1704,7 +1766,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
return true;
// Otherwise, we are unable to join the intervals.
- DEBUG(dbgs() << "\tInterference!\n");
+ LLVM_DEBUG(dbgs() << "\tInterference!\n");
Again = true; // May be possible to coalesce later.
return false;
}
@@ -1738,8 +1800,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
for (LiveInterval::SubRange &S : LI.subranges()) {
if ((S.LaneMask & ShrinkMask).none())
continue;
- DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
- << ")\n");
+ LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
+ << ")\n");
LIS->shrinkToUses(S, LI.reg);
}
LI.removeEmptySubRanges();
@@ -1756,7 +1818,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
// Update regalloc hint.
TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
<< " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
dbgs() << "\tResult = ";
@@ -1777,7 +1839,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
assert(CP.isPhys() && "Must be a physreg copy");
assert(MRI->isReserved(DstReg) && "Not a reserved register");
LiveInterval &RHS = LIS->getInterval(SrcReg);
- DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
assert(RHS.containsOneValue() && "Invalid join with reserved register");
@@ -1796,7 +1858,8 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
return false;
}
if (RHS.overlaps(LIS->getRegUnit(*UI))) {
- DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI) << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI)
+ << '\n');
return false;
}
}
@@ -1805,7 +1868,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
BitVector RegMaskUsable;
if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
!RegMaskUsable.test(DstReg)) {
- DEBUG(dbgs() << "\t\tRegMask interference\n");
+ LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n");
return false;
}
}
@@ -1835,12 +1898,12 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// %y = def
// ...
if (!MRI->hasOneNonDBGUse(SrcReg)) {
- DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
+ LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
return false;
}
if (!LIS->intervalIsInOneMBB(RHS)) {
- DEBUG(dbgs() << "\t\tComplex control flow!\n");
+ LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n");
return false;
}
@@ -1858,7 +1921,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
MachineInstr *MI = LIS->getInstructionFromIndex(SI);
if (MI->readsRegister(DstReg, TRI)) {
- DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
+ LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
return false;
}
}
@@ -1866,8 +1929,8 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// We're going to remove the copy which defines a physical reserved
// register, so remove its valno, etc.
- DEBUG(dbgs() << "\t\tRemoving phys reg def of " << printReg(DstReg, TRI)
- << " at " << CopyRegIdx << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of "
+ << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
// Create a new dead def at the new def location.
@@ -2057,6 +2120,13 @@ class JoinVals {
/// True once Pruned above has been computed.
bool PrunedComputed = false;
+ /// True if this value is determined to be identical to OtherVNI
+ /// (in valuesIdentical). This is used with CR_Erase where the erased
+ /// copy is redundant, i.e. the source value is already the same as
+ /// the destination. In such cases the subranges need to be updated
+ /// properly. See comment at pruneSubRegValues for more info.
+ bool Identical = false;
+
Val() = default;
bool isAnalyzed() const { return WriteLanes.any(); }
@@ -2073,7 +2143,7 @@ class JoinVals {
/// Find the ultimate value that VNI was copied from.
std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
- bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const;
+ bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const;
/// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
/// Return a conflict resolution when possible, but leave the hard cases as
@@ -2191,17 +2261,17 @@ LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
const VNInfo *VNI) const {
- unsigned Reg = this->Reg;
+ unsigned TrackReg = Reg;
while (!VNI->isPHIDef()) {
SlotIndex Def = VNI->def;
MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
assert(MI && "No defining instruction");
if (!MI->isFullCopy())
- return std::make_pair(VNI, Reg);
+ return std::make_pair(VNI, TrackReg);
unsigned SrcReg = MI->getOperand(1).getReg();
if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
- return std::make_pair(VNI, Reg);
+ return std::make_pair(VNI, TrackReg);
const LiveInterval &LI = LIS->getInterval(SrcReg);
const VNInfo *ValueIn;
@@ -2210,7 +2280,8 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
LiveQueryResult LRQ = LI.Query(Def);
ValueIn = LRQ.valueIn();
} else {
- // Query subranges. Pick the first matching one.
+ // Query subranges. Ensure that all matching ones take us to the same def
+ // (allowing some of them to be undef).
ValueIn = nullptr;
for (const LiveInterval::SubRange &S : LI.subranges()) {
// Transform lanemask to a mask in the joined live interval.
@@ -2218,16 +2289,27 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
if ((SMask & LaneMask).none())
continue;
LiveQueryResult LRQ = S.Query(Def);
- ValueIn = LRQ.valueIn();
- break;
+ if (!ValueIn) {
+ ValueIn = LRQ.valueIn();
+ continue;
+ }
+ if (LRQ.valueIn() && ValueIn != LRQ.valueIn())
+ return std::make_pair(VNI, TrackReg);
}
}
- if (ValueIn == nullptr)
- break;
+ if (ValueIn == nullptr) {
+ // Reaching an undefined value is legitimate, for example:
+ //
+ // 1 undef %0.sub1 = ... ;; %0.sub0 == undef
+ // 2 %1 = COPY %0 ;; %1 is defined here.
+ // 3 %0 = COPY %1 ;; Now %0.sub0 has a definition,
+ // ;; but it's equivalent to "undef".
+ return std::make_pair(nullptr, SrcReg);
+ }
VNI = ValueIn;
- Reg = SrcReg;
+ TrackReg = SrcReg;
}
- return std::make_pair(VNI, Reg);
+ return std::make_pair(VNI, TrackReg);
}
bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
@@ -2235,12 +2317,17 @@ bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
const VNInfo *Orig0;
unsigned Reg0;
std::tie(Orig0, Reg0) = followCopyChain(Value0);
- if (Orig0 == Value1)
+ if (Orig0 == Value1 && Reg0 == Other.Reg)
return true;
const VNInfo *Orig1;
unsigned Reg1;
std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
+ // If both values are undefined, and the source registers are the same
+ // register, the values are identical. Filter out cases where only one
+ // value is defined.
+ if (Orig0 == nullptr || Orig1 == nullptr)
+ return Orig0 == Orig1 && Reg0 == Reg1;
// The values are equal if they are defined at the same place and use the
// same register. Note that we cannot compare VNInfos directly as some of
@@ -2375,9 +2462,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
// to erase the IMPLICIT_DEF instruction.
if (OtherV.ErasableImplicitDef && DefMI &&
DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
- DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
- << " extends into " << printMBBReference(*DefMI->getParent())
- << ", keeping it.\n");
+ LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
+ << " extends into "
+ << printMBBReference(*DefMI->getParent())
+ << ", keeping it.\n");
OtherV.ErasableImplicitDef = false;
}
@@ -2415,9 +2503,11 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
// %other = COPY %ext
// %this = COPY %ext <-- Erase this copy
//
- if (DefMI->isFullCopy() && !CP.isPartial()
- && valuesIdentical(VNI, V.OtherVNI, Other))
+ if (DefMI->isFullCopy() && !CP.isPartial() &&
+ valuesIdentical(VNI, V.OtherVNI, Other)) {
+ V.Identical = true;
return CR_Erase;
+ }
// If the lanes written by this instruction were all undef in OtherVNI, it is
// still safe to join the live ranges. This can't be done with a simple value
@@ -2487,11 +2577,11 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
- DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
- << LR.getValNumInfo(ValNo)->def << " into "
- << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
- << V.OtherVNI->def << " --> @"
- << NewVNInfo[Assignments[ValNo]]->def << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
+ << LR.getValNumInfo(ValNo)->def << " into "
+ << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
+ << V.OtherVNI->def << " --> @"
+ << NewVNInfo[Assignments[ValNo]]->def << '\n');
break;
case CR_Replace:
case CR_Unresolved: {
@@ -2517,8 +2607,8 @@ bool JoinVals::mapValues(JoinVals &Other) {
for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
computeAssignment(i, Other);
if (Vals[i].Resolution == CR_Impossible) {
- DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
- << '@' << LR.getValNumInfo(i)->def << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
+ << '@' << LR.getValNumInfo(i)->def << '\n');
return false;
}
}
@@ -2540,13 +2630,13 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
// lanes escape the block.
SlotIndex End = OtherI->end;
if (End >= MBBEnd) {
- DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
- << OtherI->valno->id << '@' << OtherI->start << '\n');
+ LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
+ << OtherI->valno->id << '@' << OtherI->start << '\n');
return false;
}
- DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
- << OtherI->valno->id << '@' << OtherI->start
- << " to " << End << '\n');
+ LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
+ << OtherI->valno->id << '@' << OtherI->start << " to "
+ << End << '\n');
// A dead def is not a problem.
if (End.isDead())
break;
@@ -2567,7 +2657,7 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
LaneBitmask Lanes) const {
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
return false;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg)
@@ -2587,8 +2677,8 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
if (V.Resolution != CR_Unresolved)
continue;
- DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i
- << '@' << LR.getValNumInfo(i)->def << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@'
+ << LR.getValNumInfo(i)->def << '\n');
if (SubRangeJoin)
return false;
@@ -2625,7 +2715,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
while (true) {
assert(MI != MBB->end() && "Bad LastMI");
if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
- DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
+ LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
return false;
}
// LastMI is the last instruction to use the current value.
@@ -2698,8 +2788,8 @@ void JoinVals::pruneValues(JoinVals &Other,
if (!EraseImpDef)
EndPoints.push_back(Def);
}
- DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
- << ": " << Other.LR << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
+ << ": " << Other.LR << '\n');
break;
}
case CR_Erase:
@@ -2710,8 +2800,8 @@ void JoinVals::pruneValues(JoinVals &Other,
// computeAssignment(), the value that was originally copied could have
// been replaced.
LIS->pruneValue(LR, Def, &EndPoints);
- DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
- << Def << ": " << LR << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
+ << Def << ": " << LR << '\n');
}
break;
case CR_Unresolved:
@@ -2721,21 +2811,65 @@ void JoinVals::pruneValues(JoinVals &Other,
}
}
+/// Consider the following situation when coalescing the copy between
+/// %31 and %45 at 800. (The vertical lines represent live range segments.)
+///
+/// Main range Subrange 0004 (sub2)
+/// %31 %45 %31 %45
+/// 544 %45 = COPY %28 + +
+/// | v1 | v1
+/// 560B bb.1: + +
+/// 624 = %45.sub2 | v2 | v2
+/// 800 %31 = COPY %45 + + + +
+/// | v0 | v0
+/// 816 %31.sub1 = ... + |
+/// 880 %30 = COPY %31 | v1 +
+/// 928 %45 = COPY %30 | + +
+/// | | v0 | v0 <--+
+/// 992B ; backedge -> bb.1 | + + |
+/// 1040 = %31.sub0 + |
+/// This value must remain
+/// live-out!
+///
+/// Assuming that %31 is coalesced into %45, the copy at 928 becomes
+/// redundant, since it copies the value from %45 back into it. The
+/// conflict resolution for the main range determines that %45.v0 is
+/// to be erased, which is ok since %31.v1 is identical to it.
+/// The problem happens with the subrange for sub2: it has to be live
+/// on exit from the block, but since 928 was actually a point of
+/// definition of %45.sub2, %45.sub2 was not live immediately prior
+/// to that definition. As a result, when 928 was erased, the value v0
+/// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an
+/// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2,
+/// providing an incorrect value to the use at 624.
+///
+/// Since the main-range values %31.v1 and %45.v0 were proved to be
+/// identical, the corresponding values in subranges must also be the
+/// same. A redundant copy is removed because it's not needed, and not
+/// because it copied an undefined value, so any liveness that originated
+/// from that copy cannot disappear. When pruning a value that started
+/// at the removed copy, the corresponding identical value must be
+/// extended to replace it.
void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
// Look for values being erased.
bool DidPrune = false;
for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ Val &V = Vals[i];
// We should trigger in all cases in which eraseInstrs() does something.
// match what eraseInstrs() is doing, print a message so
- if (Vals[i].Resolution != CR_Erase &&
- (Vals[i].Resolution != CR_Keep || !Vals[i].ErasableImplicitDef ||
- !Vals[i].Pruned))
+ if (V.Resolution != CR_Erase &&
+ (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned))
continue;
// Check subranges at the point where the copy will be removed.
SlotIndex Def = LR.getValNumInfo(i)->def;
+ SlotIndex OtherDef;
+ if (V.Identical)
+ OtherDef = V.OtherVNI->def;
+
// Print message so mismatches with eraseInstrs() can be diagnosed.
- DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def
+ << '\n');
for (LiveInterval::SubRange &S : LI.subranges()) {
LiveQueryResult Q = S.Query(Def);
@@ -2743,19 +2877,28 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
// copied and we must remove that subrange value as well.
VNInfo *ValueOut = Q.valueOutOrDead();
if (ValueOut != nullptr && Q.valueIn() == nullptr) {
- DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
- << " at " << Def << "\n");
- LIS->pruneValue(S, Def, nullptr);
+ LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
+ << " at " << Def << "\n");
+ SmallVector<SlotIndex,8> EndPoints;
+ LIS->pruneValue(S, Def, &EndPoints);
DidPrune = true;
// Mark value number as unused.
ValueOut->markUnused();
+
+ if (V.Identical && S.Query(OtherDef).valueOut()) {
+ // If V is identical to V.OtherVNI (and S was live at OtherDef),
+ // then we can't simply prune V from S. V needs to be replaced
+ // with V.OtherVNI.
+ LIS->extendToIndices(S, EndPoints);
+ }
continue;
}
// If a subrange ends at the copy, then a value was copied but only
// partially used later. Shrink the subregister range appropriately.
if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) {
- DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask)
- << " at " << Def << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane "
+ << PrintLaneMask(S.LaneMask) << " at " << Def
+ << "\n");
ShrinkMask |= S.LaneMask;
}
}
@@ -2867,7 +3010,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
std::prev(S)->end = NewEnd;
}
}
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
if (LI != nullptr)
dbgs() << "\t\t LHS = " << *LI << '\n';
@@ -2885,7 +3028,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
ShrinkRegs.push_back(Reg);
}
ErasedInstrs.insert(MI);
- DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
+ LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
LIS->RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
break;
@@ -2940,13 +3083,14 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
NewVNInfo);
- DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n");
+ LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask)
+ << ' ' << LRange << "\n");
if (EndPoints.empty())
return;
// Recompute the parts of the live range we had to remove because of
// CR_Replace conflicts.
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
dbgs() << EndPoints[i];
@@ -2985,9 +3129,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
- DEBUG(dbgs() << "\t\tRHS = " << RHS
- << "\n\t\tLHS = " << LHS
- << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
// First compute NewVNInfo and the simple value mappings.
// Detect impossible conflicts early.
@@ -3018,8 +3160,8 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
R.LaneMask = Mask;
}
}
- DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg())
- << ' ' << LHS << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS
+ << '\n');
// Determine lanemasks of RHS in the coalesced register and merge subranges.
unsigned SrcIdx = CP.getSrcIdx();
@@ -3034,7 +3176,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
mergeSubRangeInto(LHS, R, Mask, CP);
}
}
- DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
+ LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
// Pruning implicit defs from subranges may result in the main range
// having stale segments.
@@ -3072,7 +3214,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
if (!EndPoints.empty()) {
// Recompute the parts of the live range we had to remove because of
// CR_Replace conflicts.
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
dbgs() << EndPoints[i];
@@ -3220,7 +3362,8 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
continue;
// Check that OtherReg interfere with DstReg.
if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
- DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg) << '\n');
+ LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg)
+ << '\n');
return true;
}
}
@@ -3229,7 +3372,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
void
RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
- DEBUG(dbgs() << MBB->getName() << ":\n");
+ LLVM_DEBUG(dbgs() << MBB->getName() << ":\n");
// Collect all copy-like instructions in MBB. Don't start coalescing anything
// yet, it might invalidate the iterator.
@@ -3294,7 +3437,7 @@ void RegisterCoalescer::coalesceLocals() {
}
void RegisterCoalescer::joinAllIntervals() {
- DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
+ LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
std::vector<MBBPriorityInfo> MBBs;
@@ -3350,8 +3493,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
// splitting optimization.
JoinSplitEdges = EnableJoinSplits;
- DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
- << "********** Function: " << MF->getName() << '\n');
+ LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
+ << "********** Function: " << MF->getName() << '\n');
if (VerifyCoalescing)
MF->verify(this, "Before register coalescing");
@@ -3368,14 +3511,15 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
array_pod_sort(InflateRegs.begin(), InflateRegs.end());
InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
InflateRegs.end());
- DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n");
+ LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
+ << " regs.\n");
for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
unsigned Reg = InflateRegs[i];
if (MRI->reg_nodbg_empty(Reg))
continue;
if (MRI->recomputeRegClass(Reg)) {
- DEBUG(dbgs() << printReg(Reg) << " inflated to "
- << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
+ LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to "
+ << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
++NumInflated;
LiveInterval &LI = LIS->getInterval(Reg);
@@ -3398,7 +3542,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
}
}
- DEBUG(dump());
+ LLVM_DEBUG(dump());
if (VerifyCoalescing)
MF->verify(this, "After register coalescing");
return true;
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index 9ac810c7c723..51414de518fd 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Compiler.h"
@@ -587,7 +588,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
for (auto I = Defs.begin(); I != Defs.end(); ) {
LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, I->RegUnit,
Pos.getDeadSlot());
- // If the the def is all that is live after the instruction, then in case
+ // If the def is all that is live after the instruction, then in case
// of a subregister def we need a read-undef flag.
unsigned RegUnit = I->RegUnit;
if (TargetRegisterInfo::isVirtualRegister(RegUnit) &&
@@ -635,7 +636,7 @@ void PressureDiffs::init(unsigned N) {
}
Max = Size;
free(PDiffArray);
- PDiffArray = reinterpret_cast<PressureDiff*>(calloc(N, sizeof(PressureDiff)));
+ PDiffArray = static_cast<PressureDiff*>(safe_calloc(N, sizeof(PressureDiff)));
}
void PressureDiffs::addInstruction(unsigned Idx,
@@ -747,7 +748,7 @@ void RegPressureTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) {
/// instruction independent of liveness.
void RegPressureTracker::recede(const RegisterOperands &RegOpers,
SmallVectorImpl<RegisterMaskPair> *LiveUses) {
- assert(!CurrPos->isDebugValue());
+ assert(!CurrPos->isDebugInstr());
// Boost pressure for all dead defs together.
bumpDeadDefs(RegOpers.DeadDefs);
@@ -1018,7 +1019,7 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
/// This is intended for speculative queries. It leaves pressure inconsistent
/// with the current position, so must be restored by the caller.
void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
- assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
+ assert(!MI->isDebugInstr() && "Expect a nondebug instruction.");
SlotIndex SlotIdx;
if (RequireIntervals)
@@ -1259,7 +1260,7 @@ LaneBitmask RegPressureTracker::getLiveThroughAt(unsigned RegUnit,
/// This is intended for speculative queries. It leaves pressure inconsistent
/// with the current position, so must be restored by the caller.
void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
- assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
+ assert(!MI->isDebugInstr() && "Expect a nondebug instruction.");
SlotIndex SlotIdx;
if (RequireIntervals)
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
index 97967124add6..a878c34f9aa4 100644
--- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -111,7 +111,7 @@ void RegScavenger::determineKillsAndDefs() {
assert(Tracking && "Must be tracking to determine kills and defs");
MachineInstr &MI = *MBBI;
- assert(!MI.isDebugValue() && "Debug values have no kills or defs");
+ assert(!MI.isDebugInstr() && "Debug values have no kills or defs");
// Find out which registers are early clobbered, killed, defined, and marked
// def-dead in this instruction.
@@ -158,12 +158,12 @@ void RegScavenger::unprocess() {
assert(Tracking && "Cannot unprocess because we're not tracking");
MachineInstr &MI = *MBBI;
- if (!MI.isDebugValue()) {
+ if (!MI.isDebugInstr()) {
determineKillsAndDefs();
// Commit the changes.
- setUsed(KillRegUnits);
setUnused(DefRegUnits);
+ setUsed(KillRegUnits);
}
if (MBBI == MBB->begin()) {
@@ -195,7 +195,7 @@ void RegScavenger::forward() {
I->Restore = nullptr;
}
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
return;
determineKillsAndDefs();
@@ -288,8 +288,8 @@ bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {
unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
for (unsigned Reg : *RC) {
if (!isRegUsed(Reg)) {
- DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI)
- << "\n");
+ LLVM_DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI)
+ << "\n");
return Reg;
}
}
@@ -318,7 +318,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
bool inVirtLiveRange = false;
for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
- if (MI->isDebugValue()) {
+ if (MI->isDebugInstr()) {
++InstrLimit; // Don't count debug instructions
continue;
}
@@ -561,15 +561,15 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
// If we found an unused register there is no reason to spill it.
if (!isRegUsed(SReg)) {
- DEBUG(dbgs() << "Scavenged register: " << printReg(SReg, TRI) << "\n");
+ LLVM_DEBUG(dbgs() << "Scavenged register: " << printReg(SReg, TRI) << "\n");
return SReg;
}
ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI);
Scavenged.Restore = &*std::prev(UseMI);
- DEBUG(dbgs() << "Scavenged register (with spill): " << printReg(SReg, TRI)
- << "\n");
+ LLVM_DEBUG(dbgs() << "Scavenged register (with spill): "
+ << printReg(SReg, TRI) << "\n");
return SReg;
}
@@ -594,14 +594,15 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
MachineBasicBlock::iterator ReloadAfter =
RestoreAfter ? std::next(MBBI) : MBBI;
MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
- DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
+ LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
Scavenged.Restore = &*std::prev(SpillBefore);
LiveUnits.removeReg(Reg);
- DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI)
- << " until " << *SpillBefore);
+ LLVM_DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI)
+ << " until " << *SpillBefore);
} else {
- DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI) << '\n');
+ LLVM_DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI)
+ << '\n');
}
return Reg;
}
@@ -757,8 +758,8 @@ void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) {
bool Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB);
if (Again) {
- DEBUG(dbgs() << "Warning: Required two scavenging passes for block "
- << MBB.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Warning: Required two scavenging passes for block "
+ << MBB.getName() << '\n');
Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB);
// The target required a 2nd run (because it created new vregs while
// spilling). Refuse to do another pass to keep compiletime in check.
diff --git a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
index 4e42deb406e1..6a31118cc562 100644
--- a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
@@ -31,8 +31,6 @@
using namespace llvm;
-#define DEBUG_TYPE "ip-regalloc"
-
static cl::opt<bool> DumpRegUsage(
"print-regusage", cl::init(false), cl::Hidden,
cl::desc("print register usage details collected for analysis."));
@@ -42,7 +40,9 @@ INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info",
char PhysicalRegisterUsageInfo::ID = 0;
-void PhysicalRegisterUsageInfo::anchor() {}
+void PhysicalRegisterUsageInfo::setTargetMachine(const TargetMachine &TM) {
+ this->TM = &TM;
+}
bool PhysicalRegisterUsageInfo::doInitialization(Module &M) {
RegMasks.grow(M.size());
@@ -58,22 +58,19 @@ bool PhysicalRegisterUsageInfo::doFinalization(Module &M) {
}
void PhysicalRegisterUsageInfo::storeUpdateRegUsageInfo(
- const Function *FP, std::vector<uint32_t> RegMask) {
- assert(FP != nullptr && "Function * can't be nullptr.");
- RegMasks[FP] = std::move(RegMask);
+ const Function &FP, ArrayRef<uint32_t> RegMask) {
+ RegMasks[&FP] = RegMask;
}
-const std::vector<uint32_t> *
-PhysicalRegisterUsageInfo::getRegUsageInfo(const Function *FP) {
- auto It = RegMasks.find(FP);
+ArrayRef<uint32_t>
+PhysicalRegisterUsageInfo::getRegUsageInfo(const Function &FP) {
+ auto It = RegMasks.find(&FP);
if (It != RegMasks.end())
- return &(It->second);
- return nullptr;
+ return makeArrayRef<uint32_t>(It->second);
+ return ArrayRef<uint32_t>();
}
void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const {
- const TargetRegisterInfo *TRI;
-
using FuncPtrRegMaskPair = std::pair<const Function *, std::vector<uint32_t>>;
SmallVector<const FuncPtrRegMaskPair *, 64> FPRMPairVector;
@@ -83,7 +80,7 @@ void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const {
FPRMPairVector.push_back(&RegMask);
// sort the vector to print analysis in alphabatic order of function name.
- std::sort(
+ llvm::sort(
FPRMPairVector.begin(), FPRMPairVector.end(),
[](const FuncPtrRegMaskPair *A, const FuncPtrRegMaskPair *B) -> bool {
return A->first->getName() < B->first->getName();
@@ -92,8 +89,9 @@ void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const {
for (const FuncPtrRegMaskPair *FPRMPair : FPRMPairVector) {
OS << FPRMPair->first->getName() << " "
<< "Clobbered Registers: ";
- TRI = TM->getSubtarget<TargetSubtargetInfo>(*(FPRMPair->first))
- .getRegisterInfo();
+ const TargetRegisterInfo *TRI
+ = TM->getSubtarget<TargetSubtargetInfo>(*(FPRMPair->first))
+ .getRegisterInfo();
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
if (MachineOperand::clobbersPhysReg(&(FPRMPair->second[0]), PReg))
diff --git a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index 1e1f36a35ecc..156d1c81c238 100644
--- a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -77,20 +77,20 @@ private:
/// Split unrelated subregister components and rename them to new vregs.
bool renameComponents(LiveInterval &LI) const;
- /// \brief Build a vector of SubRange infos and a union find set of
+ /// Build a vector of SubRange infos and a union find set of
/// equivalence classes.
/// Returns true if more than 1 equivalence class was found.
bool findComponents(IntEqClasses &Classes,
SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
LiveInterval &LI) const;
- /// \brief Distribute the LiveInterval segments into the new LiveIntervals
+ /// Distribute the LiveInterval segments into the new LiveIntervals
/// belonging to their class.
void distribute(const IntEqClasses &Classes,
const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
const SmallVectorImpl<LiveInterval*> &Intervals) const;
- /// \brief Constructs main liverange and add missing undef+dead flags.
+ /// Constructs main liverange and add missing undef+dead flags.
void computeMainRangesFixFlags(const IntEqClasses &Classes,
const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
const SmallVectorImpl<LiveInterval*> &Intervals) const;
@@ -134,17 +134,17 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const {
const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
SmallVector<LiveInterval*, 4> Intervals;
Intervals.push_back(&LI);
- DEBUG(dbgs() << printReg(Reg) << ": Found " << Classes.getNumClasses()
- << " equivalence classes.\n");
- DEBUG(dbgs() << printReg(Reg) << ": Splitting into newly created:");
+ LLVM_DEBUG(dbgs() << printReg(Reg) << ": Found " << Classes.getNumClasses()
+ << " equivalence classes.\n");
+ LLVM_DEBUG(dbgs() << printReg(Reg) << ": Splitting into newly created:");
for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses;
++I) {
unsigned NewVReg = MRI->createVirtualRegister(RegClass);
LiveInterval &NewLI = LIS->createEmptyInterval(NewVReg);
Intervals.push_back(&NewLI);
- DEBUG(dbgs() << ' ' << printReg(NewVReg));
+ LLVM_DEBUG(dbgs() << ' ' << printReg(NewVReg));
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
rewriteOperands(Classes, SubRangeInfos, Intervals);
distribute(Classes, SubRangeInfos, Intervals);
@@ -219,7 +219,8 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
if (!MO.isDef() && !MO.readsReg())
continue;
- SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
+ auto *MI = MO.getParent();
+ SlotIndex Pos = LIS->getInstructionIndex(*MI);
Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber())
: Pos.getBaseIndex();
unsigned SubRegIdx = MO.getSubReg();
@@ -245,11 +246,14 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
MO.setReg(VReg);
if (MO.isTied() && Reg != VReg) {
- /// Undef use operands are not tracked in the equivalence class but need
- /// to be update if they are tied.
- MO.getParent()->substituteRegister(Reg, VReg, 0, TRI);
-
- // substituteRegister breaks the iterator, so restart.
+ /// Undef use operands are not tracked in the equivalence class,
+ /// but need to be updated if they are tied; take care to only
+ /// update the tied operand.
+ unsigned OperandNo = MI->getOperandNo(&MO);
+ unsigned TiedIdx = MI->findTiedOperandIdx(OperandNo);
+ MI->getOperand(TiedIdx).setReg(VReg);
+
+ // above substitution breaks the iterator, so restart.
I = MRI->reg_nodbg_begin(Reg);
}
}
@@ -376,8 +380,8 @@ bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) {
if (!MRI->subRegLivenessEnabled())
return false;
- DEBUG(dbgs() << "Renaming independent subregister live ranges in "
- << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Renaming independent subregister live ranges in "
+ << MF.getName() << '\n');
LIS = &getAnalysis<LiveIntervals>();
TII = MF.getSubtarget().getInstrInfo();
diff --git a/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
index f1885aa74285..a02302e6ff99 100644
--- a/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -13,9 +13,12 @@
/// happen is that the MachineFunction has the FailedISel property.
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/Support/Debug.h"
@@ -42,12 +45,23 @@ namespace {
StringRef getPassName() const override { return "ResetMachineFunction"; }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<StackProtector>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override {
+ // No matter what happened, whether we successfully selected the function
+ // or not, nothing is going to use the vreg types after us. Make sure they
+ // disappear.
+ auto ClearVRegTypesOnReturn =
+ make_scope_exit([&MF]() { MF.getRegInfo().clearVirtRegTypes(); });
+
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel)) {
if (AbortOnFailedISel)
report_fatal_error("Instruction selection failed");
- DEBUG(dbgs() << "Reseting: " << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Resetting: " << MF.getName() << '\n');
++NumFunctionsReset;
MF.reset();
if (EmitFallbackDiag) {
@@ -65,7 +79,7 @@ namespace {
char ResetMachineFunction::ID = 0;
INITIALIZE_PASS(ResetMachineFunction, DEBUG_TYPE,
- "reset machine function if ISel failed", false, false)
+ "Reset machine function if ISel failed", false, false)
MachineFunctionPass *
llvm::createResetMachineFunctionPass(bool EmitFallbackDiag = false,
diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp
index 51233be521be..cbbbf7c385aa 100644
--- a/contrib/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp
@@ -24,10 +24,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -61,7 +63,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -88,6 +90,13 @@ STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads");
} // namespace llvm
+/// Use __safestack_pointer_address even if the platform has a faster way of
+/// access safe stack pointer.
+static cl::opt<bool>
+ SafeStackUsePointerAddress("safestack-use-pointer-address",
+ cl::init(false), cl::Hidden);
+
+
namespace {
/// Rewrite an SCEV expression for a memory access address to an expression that
@@ -134,14 +143,14 @@ class SafeStack {
/// might expect to appear on the stack on most common targets.
enum { StackAlignment = 16 };
- /// \brief Return the value of the stack canary.
+ /// Return the value of the stack canary.
Value *getStackGuard(IRBuilder<> &IRB, Function &F);
- /// \brief Load stack guard from the frame and check if it has changed.
+ /// Load stack guard from the frame and check if it has changed.
void checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
AllocaInst *StackGuardSlot, Value *StackGuard);
- /// \brief Find all static allocas, dynamic allocas, return instructions and
+ /// Find all static allocas, dynamic allocas, return instructions and
/// stack restore points (exception unwind blocks and setjmp calls) in the
/// given function and append them to the respective vectors.
void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas,
@@ -150,11 +159,11 @@ class SafeStack {
SmallVectorImpl<ReturnInst *> &Returns,
SmallVectorImpl<Instruction *> &StackRestorePoints);
- /// \brief Calculate the allocation size of a given alloca. Returns 0 if the
+ /// Calculate the allocation size of a given alloca. Returns 0 if the
/// size can not be statically determined.
uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI);
- /// \brief Allocate space for all static allocas in \p StaticAllocas,
+ /// Allocate space for all static allocas in \p StaticAllocas,
/// replace allocas with pointers into the unsafe stack and generate code to
/// restore the stack pointer before all return instructions in \p Returns.
///
@@ -167,7 +176,7 @@ class SafeStack {
Instruction *BasePointer,
AllocaInst *StackGuardSlot);
- /// \brief Generate code to restore the stack after all stack restore points
+ /// Generate code to restore the stack after all stack restore points
/// in \p StackRestorePoints.
///
/// \returns A local variable in which to maintain the dynamic top of the
@@ -177,7 +186,7 @@ class SafeStack {
ArrayRef<Instruction *> StackRestorePoints,
Value *StaticTop, bool NeedDynamicTop);
- /// \brief Replace all allocas in \p DynamicAllocas with code to allocate
+ /// Replace all allocas in \p DynamicAllocas with code to allocate
/// space dynamically on the unsafe stack and store the dynamic unsafe stack
/// top to \p DynamicTop if non-null.
void moveDynamicAllocasToUnsafeStack(Function &F, Value *UnsafeStackPtr,
@@ -191,6 +200,9 @@ class SafeStack {
bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr,
uint64_t AllocaSize);
+ bool ShouldInlinePointerAddress(CallSite &CS);
+ void TryInlinePointerAddress();
+
public:
SafeStack(Function &F, const TargetLoweringBase &TL, const DataLayout &DL,
ScalarEvolution &SE)
@@ -230,16 +242,17 @@ bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AllocaSize));
bool Safe = AllocaRange.contains(AccessRange);
- DEBUG(dbgs() << "[SafeStack] "
- << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ")
- << *AllocaPtr << "\n"
- << " Access " << *Addr << "\n"
- << " SCEV " << *Expr
- << " U: " << SE.getUnsignedRange(Expr)
- << ", S: " << SE.getSignedRange(Expr) << "\n"
- << " Range " << AccessRange << "\n"
- << " AllocaRange " << AllocaRange << "\n"
- << " " << (Safe ? "safe" : "unsafe") << "\n");
+ LLVM_DEBUG(
+ dbgs() << "[SafeStack] "
+ << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ")
+ << *AllocaPtr << "\n"
+ << " Access " << *Addr << "\n"
+ << " SCEV " << *Expr
+ << " U: " << SE.getUnsignedRange(Expr)
+ << ", S: " << SE.getSignedRange(Expr) << "\n"
+ << " Range " << AccessRange << "\n"
+ << " AllocaRange " << AllocaRange << "\n"
+ << " " << (Safe ? "safe" : "unsafe") << "\n");
return Safe;
}
@@ -286,8 +299,9 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
case Instruction::Store:
if (V == I->getOperand(0)) {
// Stored the pointer - conservatively assume it may be unsafe.
- DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
- << "\n store of address: " << *I << "\n");
+ LLVM_DEBUG(dbgs()
+ << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n store of address: " << *I << "\n");
return false;
}
@@ -312,9 +326,9 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) {
- DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
- << "\n unsafe memintrinsic: " << *I
- << "\n");
+ LLVM_DEBUG(dbgs()
+ << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n unsafe memintrinsic: " << *I << "\n");
return false;
}
continue;
@@ -332,8 +346,8 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
if (A->get() == V)
if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) ||
CS.doesNotAccessMemory()))) {
- DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
- << "\n unsafe call: " << *I << "\n");
+ LLVM_DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n unsafe call: " << *I << "\n");
return false;
}
continue;
@@ -545,6 +559,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
for (Argument *Arg : ByValArguments) {
unsigned Offset = SSL.getObjectOffset(Arg);
+ unsigned Align = SSL.getObjectAlignment(Arg);
Type *Ty = Arg->getType()->getPointerElementType();
uint64_t Size = DL.getTypeStoreSize(Ty);
@@ -561,7 +576,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
DIExpression::NoDeref, -Offset, DIExpression::NoDeref);
Arg->replaceAllUsesWith(NewArg);
IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
- IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment());
+ IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlignment(), Size);
}
// Allocate space for every unsafe static AllocaInst on the unsafe stack.
@@ -695,6 +710,35 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
}
}
+bool SafeStack::ShouldInlinePointerAddress(CallSite &CS) {
+ Function *Callee = CS.getCalledFunction();
+ if (CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee))
+ return true;
+ if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
+ CS.isNoInline())
+ return false;
+ return true;
+}
+
+void SafeStack::TryInlinePointerAddress() {
+ if (!isa<CallInst>(UnsafeStackPtr))
+ return;
+
+ if(F.hasFnAttribute(Attribute::OptimizeNone))
+ return;
+
+ CallSite CS(UnsafeStackPtr);
+ Function *Callee = CS.getCalledFunction();
+ if (!Callee || Callee->isDeclaration())
+ return;
+
+ if (!ShouldInlinePointerAddress(CS))
+ return;
+
+ InlineFunctionInfo IFI;
+ InlineFunction(CS, IFI);
+}
+
bool SafeStack::run() {
assert(F.hasFnAttribute(Attribute::SafeStack) &&
"Can't run SafeStack on a function without the attribute");
@@ -731,7 +775,13 @@ bool SafeStack::run() {
++NumUnsafeStackRestorePointsFunctions;
IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt());
- UnsafeStackPtr = TL.getSafeStackPointerLocation(IRB);
+ if (SafeStackUsePointerAddress) {
+ Value *Fn = F.getParent()->getOrInsertFunction(
+ "__safestack_pointer_address", StackPtrTy->getPointerTo(0));
+ UnsafeStackPtr = IRB.CreateCall(Fn);
+ } else {
+ UnsafeStackPtr = TL.getSafeStackPointerLocation(IRB);
+ }
// Load the current stack pointer (we'll also use it as a base pointer).
// FIXME: use a dedicated register for it ?
@@ -779,7 +829,9 @@ bool SafeStack::run() {
IRB.CreateStore(BasePointer, UnsafeStackPtr);
}
- DEBUG(dbgs() << "[SafeStack] safestack applied\n");
+ TryInlinePointerAddress();
+
+ LLVM_DEBUG(dbgs() << "[SafeStack] safestack applied\n");
return true;
}
@@ -800,17 +852,17 @@ public:
}
bool runOnFunction(Function &F) override {
- DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n");
if (!F.hasFnAttribute(Attribute::SafeStack)) {
- DEBUG(dbgs() << "[SafeStack] safestack is not requested"
- " for this function\n");
+ LLVM_DEBUG(dbgs() << "[SafeStack] safestack is not requested"
+ " for this function\n");
return false;
}
if (F.isDeclaration()) {
- DEBUG(dbgs() << "[SafeStack] function definition"
- " is not available\n");
+ LLVM_DEBUG(dbgs() << "[SafeStack] function definition"
+ " is not available\n");
return false;
}
diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
index 072e6e090e1e..329458778a98 100644
--- a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Instruction.h"
@@ -101,10 +102,10 @@ void StackColoring::collectMarkers() {
// For each basic block, compute
// * the list of markers in the instruction order
// * the sets of allocas whose lifetime starts or ends in this BB
- DEBUG(dbgs() << "Instructions:\n");
+ LLVM_DEBUG(dbgs() << "Instructions:\n");
unsigned InstNo = 0;
for (BasicBlock *BB : depth_first(&F)) {
- DEBUG(dbgs() << " " << InstNo << ": BB " << BB->getName() << "\n");
+ LLVM_DEBUG(dbgs() << " " << InstNo << ": BB " << BB->getName() << "\n");
unsigned BBStart = InstNo++;
BlockLifetimeInfo &BlockInfo = BlockLiveness[BB];
@@ -121,9 +122,9 @@ void StackColoring::collectMarkers() {
}
auto ProcessMarker = [&](Instruction *I, const Marker &M) {
- DEBUG(dbgs() << " " << InstNo << ": "
- << (M.IsStart ? "start " : "end ") << M.AllocaNo << ", "
- << *I << "\n");
+ LLVM_DEBUG(dbgs() << " " << InstNo << ": "
+ << (M.IsStart ? "start " : "end ") << M.AllocaNo
+ << ", " << *I << "\n");
BBMarkers[BB].push_back({InstNo, M});
@@ -280,7 +281,7 @@ LLVM_DUMP_METHOD void StackColoring::dumpLiveRanges() {
#endif
void StackColoring::run() {
- DEBUG(dumpAllocas());
+ LLVM_DEBUG(dumpAllocas());
for (unsigned I = 0; I < NumAllocas; ++I)
AllocaNumbering[Allocas[I]] = I;
@@ -303,7 +304,7 @@ void StackColoring::run() {
LiveRanges[I] = getFullLiveRange();
calculateLocalLiveness();
- DEBUG(dumpBlockLiveness());
+ LLVM_DEBUG(dumpBlockLiveness());
calculateLiveIntervals();
- DEBUG(dumpLiveRanges());
+ LLVM_DEBUG(dumpLiveRanges());
}
diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
index b1759359e46f..07b6a5d1883b 100644
--- a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -42,6 +42,7 @@ LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) {
void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment,
const StackColoring::LiveRange &Range) {
StackObjects.push_back({V, Size, Alignment, Range});
+ ObjectAlignments[V] = Alignment;
MaxAlignment = std::max(MaxAlignment, Alignment);
}
@@ -62,30 +63,30 @@ void StackLayout::layoutObject(StackObject &Obj) {
return;
}
- DEBUG(dbgs() << "Layout: size " << Obj.Size << ", align " << Obj.Alignment
- << ", range " << Obj.Range << "\n");
+ LLVM_DEBUG(dbgs() << "Layout: size " << Obj.Size << ", align "
+ << Obj.Alignment << ", range " << Obj.Range << "\n");
assert(Obj.Alignment <= MaxAlignment);
unsigned Start = AdjustStackOffset(0, Obj.Size, Obj.Alignment);
unsigned End = Start + Obj.Size;
- DEBUG(dbgs() << " First candidate: " << Start << " .. " << End << "\n");
+ LLVM_DEBUG(dbgs() << " First candidate: " << Start << " .. " << End << "\n");
for (const StackRegion &R : Regions) {
- DEBUG(dbgs() << " Examining region: " << R.Start << " .. " << R.End
- << ", range " << R.Range << "\n");
+ LLVM_DEBUG(dbgs() << " Examining region: " << R.Start << " .. " << R.End
+ << ", range " << R.Range << "\n");
assert(End >= R.Start);
if (Start >= R.End) {
- DEBUG(dbgs() << " Does not intersect, skip.\n");
+ LLVM_DEBUG(dbgs() << " Does not intersect, skip.\n");
continue;
}
if (Obj.Range.Overlaps(R.Range)) {
// Find the next appropriate location.
Start = AdjustStackOffset(R.End, Obj.Size, Obj.Alignment);
End = Start + Obj.Size;
- DEBUG(dbgs() << " Overlaps. Next candidate: " << Start << " .. " << End
- << "\n");
+ LLVM_DEBUG(dbgs() << " Overlaps. Next candidate: " << Start << " .. "
+ << End << "\n");
continue;
}
if (End <= R.End) {
- DEBUG(dbgs() << " Reusing region(s).\n");
+ LLVM_DEBUG(dbgs() << " Reusing region(s).\n");
break;
}
}
@@ -94,13 +95,13 @@ void StackLayout::layoutObject(StackObject &Obj) {
if (End > LastRegionEnd) {
// Insert a new region at the end. Maybe two.
if (Start > LastRegionEnd) {
- DEBUG(dbgs() << " Creating gap region: " << LastRegionEnd << " .. "
- << Start << "\n");
+ LLVM_DEBUG(dbgs() << " Creating gap region: " << LastRegionEnd << " .. "
+ << Start << "\n");
Regions.emplace_back(LastRegionEnd, Start, StackColoring::LiveRange());
LastRegionEnd = Start;
}
- DEBUG(dbgs() << " Creating new region: " << LastRegionEnd << " .. " << End
- << ", range " << Obj.Range << "\n");
+ LLVM_DEBUG(dbgs() << " Creating new region: " << LastRegionEnd << " .. "
+ << End << ", range " << Obj.Range << "\n");
Regions.emplace_back(LastRegionEnd, End, Obj.Range);
LastRegionEnd = End;
}
@@ -149,5 +150,5 @@ void StackLayout::computeLayout() {
for (auto &Obj : StackObjects)
layoutObject(Obj);
- DEBUG(print(dbgs()));
+ LLVM_DEBUG(print(dbgs()));
}
diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm/lib/CodeGen/SafeStackLayout.h
index 7c1292f251f7..ac531d800f6e 100644
--- a/contrib/llvm/lib/CodeGen/SafeStackLayout.h
+++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.h
@@ -47,6 +47,7 @@ class StackLayout {
SmallVector<StackObject, 8> StackObjects;
DenseMap<const Value *, unsigned> ObjectOffsets;
+ DenseMap<const Value *, unsigned> ObjectAlignments;
void layoutObject(StackObject &Obj);
@@ -64,6 +65,9 @@ public:
/// Returns the offset to the object start in the stack frame.
unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; }
+ /// Returns the alignment of the object
+ unsigned getObjectAlignment(const Value *V) { return ObjectAlignments[V]; }
+
/// Returns the size of the entire frame.
unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; }
diff --git a/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index cef413f9d410..9387722bfebd 100644
--- a/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -586,9 +586,6 @@ static void scalarizeMaskedScatter(CallInst *CI) {
}
bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
bool EverMadeChange = false;
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
index 0635e8f41ee7..46064012d9d8 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 9249fa84b38b..d1c5ddabb975 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -38,6 +38,7 @@
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
@@ -118,7 +119,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
DbgValues.clear();
const TargetSubtargetInfo &ST = mf.getSubtarget();
- SchedModel.init(ST.getSchedModel(), &ST, TII);
+ SchedModel.init(&ST);
}
/// If this machine instr has memory reference information and it can be
@@ -266,7 +267,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
}
}
-/// \brief Adds register dependencies (data, anti, and output) from this SUnit
+/// Adds register dependencies (data, anti, and output) from this SUnit
/// to following instructions in the same scheduling region that depend the
/// physical register referenced at OperIdx.
void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
@@ -317,13 +318,14 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
} else {
addPhysRegDataDeps(SU, OperIdx);
- // clear this register's use list
- if (Uses.contains(Reg))
- Uses.eraseAll(Reg);
-
- if (!MO.isDead()) {
- Defs.eraseAll(Reg);
- } else if (SU->isCall) {
+ // Clear previous uses and defs of this register and its subergisters.
+ for (MCSubRegIterator SubReg(Reg, TRI, true); SubReg.isValid(); ++SubReg) {
+ if (Uses.contains(*SubReg))
+ Uses.eraseAll(*SubReg);
+ if (!MO.isDead())
+ Defs.eraseAll(*SubReg);
+ }
+ if (MO.isDead() && SU->isCall) {
// Calls will not be reordered because of chain dependencies (see
// below). Since call operands are dead, calls may continue to be added
// to the DefList making dependence checking quadratic in the size of
@@ -468,7 +470,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
}
-/// \brief Adds a register data dependency if the instruction that defines the
+/// Adds a register data dependency if the instruction that defines the
/// virtual register used at OperIdx is mapped to an SUnit. Add a register
/// antidependency from this SUnit to instructions that occur later in the same
/// scheduling region if they write the virtual register.
@@ -514,7 +516,7 @@ void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
}
}
-/// \brief Creates an SUnit for each real instruction, numbered in top-down
+/// Creates an SUnit for each real instruction, numbered in top-down
/// topological order. The instruction order A < B, implies that no edge exists
/// from B to A.
///
@@ -532,7 +534,7 @@ void ScheduleDAGInstrs::initSUnits() {
SUnits.reserve(NumRegionInstrs);
for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) {
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
continue;
SUnit *SU = newSUnit(&MI);
@@ -763,6 +765,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
DbgMI = &MI;
continue;
}
+ if (MI.isDebugLabel())
+ continue;
+
SUnit *SU = MISUnitMap[&MI];
assert(SU && "No SUnit mapped to this MI");
@@ -845,8 +850,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
BarrierChain->addPredBarrier(SU);
BarrierChain = SU;
- DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
- << BarrierChain->NodeNum << ").\n";);
+ LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
+ << BarrierChain->NodeNum << ").\n";);
// Add dependencies against everything below it and clear maps.
addBarrierChain(Stores);
@@ -934,11 +939,12 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// Reduce maps if they grow huge.
if (Stores.size() + Loads.size() >= HugeRegion) {
- DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
+ LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
}
if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
- DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
+ LLVM_DEBUG(
+ dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
}
}
@@ -978,10 +984,8 @@ void ScheduleDAGInstrs::Value2SUsMap::dump() {
void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
Value2SUsMap &loads, unsigned N) {
- DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";
- stores.dump();
- dbgs() << "Loading SUnits:\n";
- loads.dump());
+ LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump();
+ dbgs() << "Loading SUnits:\n"; loads.dump());
// Insert all SU's NodeNums into a vector and sort it.
std::vector<unsigned> NodeNums;
@@ -992,7 +996,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
for (auto &I : loads)
for (auto *SU : I.second)
NodeNums.push_back(SU->NodeNum);
- std::sort(NodeNums.begin(), NodeNums.end());
+ llvm::sort(NodeNums.begin(), NodeNums.end());
// The N last elements in NodeNums will be removed, and the SU with
// the lowest NodeNum of them will become the new BarrierChain to
@@ -1007,12 +1011,12 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
BarrierChain->addPredBarrier(newBarrierChain);
BarrierChain = newBarrierChain;
- DEBUG(dbgs() << "Inserting new barrier chain: SU("
- << BarrierChain->NodeNum << ").\n";);
+ LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU("
+ << BarrierChain->NodeNum << ").\n";);
}
else
- DEBUG(dbgs() << "Keeping old barrier chain: SU("
- << BarrierChain->NodeNum << ").\n";);
+ LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU("
+ << BarrierChain->NodeNum << ").\n";);
}
else
BarrierChain = newBarrierChain;
@@ -1020,10 +1024,8 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
insertBarrierChain(stores);
insertBarrierChain(loads);
- DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n";
- stores.dump();
- dbgs() << "Loading SUnits:\n";
- loads.dump());
+ LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump();
+ dbgs() << "Loading SUnits:\n"; loads.dump());
}
static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
@@ -1044,14 +1046,14 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
}
void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
- DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
+ LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
LiveRegs.init(*TRI);
LiveRegs.addLiveOuts(MBB);
// Examine block from end to start...
for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
continue;
// Update liveness. Registers that are defed but not used in this
@@ -1087,7 +1089,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
while (I->isBundledWithSucc())
++I;
do {
- if (!I->isDebugValue())
+ if (!I->isDebugInstr())
toggleKills(MRI, LiveRegs, *I, true);
--I;
} while(I != First);
@@ -1212,7 +1214,7 @@ public:
RootSet[SU->NodeNum] = RData;
}
- /// \brief Called once for each tree edge after calling visitPostOrderNode on
+ /// Called once for each tree edge after calling visitPostOrderNode on
/// the predecessor. Increment the parent node's instruction count and
/// preemptively join this subtree to its parent's if it is small enough.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
@@ -1245,11 +1247,11 @@ public:
}
R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
- DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
+ LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
- DEBUG(dbgs() << " SU(" << Idx << ") in tree "
- << R.DFSNodeData[Idx].SubtreeID << '\n');
+ LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree "
+ << R.DFSNodeData[Idx].SubtreeID << '\n');
}
for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) {
unsigned PredTree = SubtreeClasses[P.first->NodeNum];
@@ -1404,8 +1406,8 @@ void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
for (const Connection &C : SubtreeConnections[SubtreeID]) {
SubtreeConnectLevels[C.TreeID] =
std::max(SubtreeConnectLevels[C.TreeID], C.Level);
- DEBUG(dbgs() << " Tree: " << C.TreeID
- << " @" << SubtreeConnectLevels[C.TreeID] << '\n');
+ LLVM_DEBUG(dbgs() << " Tree: " << C.TreeID << " @"
+ << SubtreeConnectLevels[C.TreeID] << '\n');
}
}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
index 37c4a470bd0a..ff2085aae865 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -61,7 +61,7 @@ namespace llvm {
}
- std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
+ std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *Graph);
static std::string getNodeAttributes(const SUnit *N,
const ScheduleDAG *Graph) {
return "shape=Mrecord";
diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index b789e2d9c52c..b8bfe69a76e1 100644
--- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/Compiler.h"
@@ -68,12 +69,12 @@ ScoreboardHazardRecognizer::ScoreboardHazardRecognizer(
// If MaxLookAhead is not set above, then we are not enabled.
if (!isEnabled())
- DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n");
+ LLVM_DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n");
else {
// A nonempty itinerary must have a SchedModel.
IssueWidth = ItinData->SchedModel.IssueWidth;
- DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
- << ScoreboardDepth << '\n');
+ LLVM_DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
+ << ScoreboardDepth << '\n');
}
}
@@ -155,9 +156,9 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
}
if (!freeUnits) {
- DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", ");
- DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
- DEBUG(DAG->dumpNode(SU));
+ LLVM_DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", ");
+ LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
+ LLVM_DEBUG(DAG->dumpNode(SU));
return Hazard;
}
}
@@ -223,8 +224,8 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
cycle += IS->getNextCycles();
}
- DEBUG(ReservedScoreboard.dump());
- DEBUG(RequiredScoreboard.dump());
+ LLVM_DEBUG(ReservedScoreboard.dump());
+ LLVM_DEBUG(RequiredScoreboard.dump());
}
void ScoreboardHazardRecognizer::AdvanceCycle() {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 03cb2e310c7e..7a99687757f8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -36,7 +36,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
@@ -60,6 +59,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -122,7 +122,7 @@ namespace {
bool LegalTypes = false;
bool ForCodeSize;
- /// \brief Worklist of all of the nodes that need to be simplified.
+ /// Worklist of all of the nodes that need to be simplified.
///
/// This must behave as a stack -- new nodes to process are pushed onto the
/// back and when processing we pop off of the back.
@@ -131,14 +131,14 @@ namespace {
/// due to nodes being deleted from the underlying DAG.
SmallVector<SDNode *, 64> Worklist;
- /// \brief Mapping from an SDNode to its position on the worklist.
+ /// Mapping from an SDNode to its position on the worklist.
///
/// This is used to find and remove nodes from the worklist (by nulling
/// them) when they are deleted from the underlying DAG. It relies on
/// stable indices of nodes within the worklist.
DenseMap<SDNode *, unsigned> WorklistMap;
- /// \brief Set of nodes which have been combined (at least once).
+ /// Set of nodes which have been combined (at least once).
///
/// This is used to allow us to reliably add any operands of a DAG node
/// which have not yet been combined to the worklist.
@@ -232,14 +232,25 @@ namespace {
return SimplifyDemandedBits(Op, Demanded);
}
+ /// Check the specified vector node value to see if it can be simplified or
+ /// if things it uses can be simplified as it only uses some of the
+ /// elements. If so, return true.
+ bool SimplifyDemandedVectorElts(SDValue Op) {
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ APInt Demanded = APInt::getAllOnesValue(NumElts);
+ return SimplifyDemandedVectorElts(Op, Demanded);
+ }
+
bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
+ bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+ bool AssumeSingleUse = false);
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
- /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
+ /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
/// load.
///
/// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
@@ -258,10 +269,6 @@ namespace {
SDValue PromoteExtend(SDValue Op);
bool PromoteLoad(SDValue Op);
- void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
- SDValue ExtLoad, const SDLoc &DL,
- ISD::NodeType ExtType);
-
/// Call the node-specific routine that knows how to fold each
/// particular type of node. If that doesn't do anything, try the
/// target-specific DAG combines.
@@ -292,7 +299,9 @@ namespace {
SDValue visitMUL(SDNode *N);
SDValue useDivRem(SDNode *N);
SDValue visitSDIV(SDNode *N);
+ SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitUDIV(SDNode *N);
+ SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitREM(SDNode *N);
SDValue visitMULHU(SDNode *N);
SDValue visitMULHS(SDNode *N);
@@ -302,9 +311,9 @@ namespace {
SDValue visitUMULO(SDNode *N);
SDValue visitIMINMAX(SDNode *N);
SDValue visitAND(SDNode *N);
- SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
+ SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitOR(SDNode *N);
- SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
+ SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitXOR(SDNode *N);
SDValue SimplifyVBinOp(SDNode *N);
SDValue visitSHL(SDNode *N);
@@ -323,7 +332,6 @@ namespace {
SDValue visitVSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
- SDValue visitSETCCE(SDNode *N);
SDValue visitSETCCCARRY(SDNode *N);
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
@@ -385,8 +393,8 @@ namespace {
SDValue visitFMULForFMADistributiveCombine(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
- SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
- SDValue RHS);
+ SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1);
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
@@ -403,8 +411,11 @@ namespace {
SDValue N2, SDValue N3, ISD::CondCode CC);
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
const SDLoc &DL);
+ SDValue unfoldMaskedMerge(SDNode *N);
+ SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
- const SDLoc &DL, bool foldBooleans = true);
+ const SDLoc &DL, bool foldBooleans);
+ SDValue rebuildSetCC(SDValue N);
bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
SDValue &CC) const;
@@ -414,20 +425,21 @@ namespace {
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue CombineExtLoad(SDNode *N);
+ SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
- SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
+ SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
- SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
SDNodeFlags Flags, bool Reciprocal);
- SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
SDNodeFlags Flags, bool Reciprocal);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
@@ -442,13 +454,14 @@ namespace {
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
SDValue TransformFPLoadStorePair(SDNode *N);
+ SDValue convertBuildVecZextToZext(SDNode *N);
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask, SDValue VecIn1,
SDValue VecIn2, unsigned LeftIdx);
- SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
+ SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
/// Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
@@ -500,15 +513,15 @@ namespace {
bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
EVT LoadResultTy, EVT &ExtVT);
- /// Helper function to calculate whether the given Load can have its
+ /// Helper function to calculate whether the given Load/Store can have its
/// width reduced to ExtVT.
- bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
- EVT &ExtVT, unsigned ShAmt = 0);
+ bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
+ EVT &MemVT, unsigned ShAmt = 0);
/// Used by BackwardsPropagateMask to find suitable loads.
bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
- SmallPtrSetImpl<SDNode*> &NodeWithConsts,
- ConstantSDNode *Mask, SDNode *&UncombinedNode);
+ SmallPtrSetImpl<SDNode*> &NodesWithConsts,
+ ConstantSDNode *Mask, SDNode *&NodeToMask);
/// Attempt to propagate a given AND node back to load leaves so that they
/// can be combined into narrow loads.
bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
@@ -530,23 +543,28 @@ namespace {
/// This is a helper function for MergeConsecutiveStores. Stores
/// that potentially may be merged with St are placed in
- /// StoreNodes.
+ /// StoreNodes. RootNode is a chain predecessor to all store
+ /// candidates.
void getStoreMergeCandidates(StoreSDNode *St,
- SmallVectorImpl<MemOpLink> &StoreNodes);
+ SmallVectorImpl<MemOpLink> &StoreNodes,
+ SDNode *&Root);
/// Helper function for MergeConsecutiveStores. Checks if
/// candidate stores have indirect dependency through their
- /// operands. \return True if safe to merge.
+ /// operands. RootNode is the predecessor to all stores calculated
+ /// by getStoreMergeCandidates and is used to prune the dependency check.
+ /// \return True if safe to merge.
bool checkMergeStoreCandidatesForDependencies(
- SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
+ SDNode *RootNode);
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return number of stores that were merged into a merged store (the
/// affected nodes are stored as a prefix in \p StoreNodes).
- bool MergeConsecutiveStores(StoreSDNode *N);
+ bool MergeConsecutiveStores(StoreSDNode *St);
- /// \brief Try to transform a truncation where C is a constant:
+ /// Try to transform a truncation where C is a constant:
/// (trunc (and X, C)) -> (and (trunc X), (trunc C))
///
/// \p N needs to be a truncation and its first operand an AND. Other
@@ -554,6 +572,16 @@ namespace {
/// single-use) and if missed an empty SDValue is returned.
SDValue distributeTruncateThroughAnd(SDNode *N);
+ /// Helper function to determine whether the target supports operation
+ /// given by \p Opcode for type \p VT, that is, whether the operation
+ /// is legal or custom before legalizing operations, and whether is
+ /// legal (but not custom) after legalization.
+ bool hasOperation(unsigned Opcode, EVT VT) {
+ if (LegalOperations)
+ return TLI.isOperationLegal(Opcode, VT);
+ return TLI.isOperationLegalOrCustom(Opcode, VT);
+ }
+
public:
/// Runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
@@ -564,11 +592,7 @@ namespace {
/// legalization these can be huge.
EVT getShiftAmountTy(EVT LHSTy) {
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
- if (LHSTy.isVector())
- return LHSTy;
- auto &DL = DAG.getDataLayout();
- return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
- : TLI.getPointerTy(DL);
+ return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
}
/// This method returns true if we are running before type legalization or
@@ -582,6 +606,10 @@ namespace {
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
+
+ void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
+ SDValue OrigLoad, SDValue ExtLoad,
+ ISD::NodeType ExtType);
};
/// This class is a DAGUpdateListener that removes any deleted
@@ -657,8 +685,13 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
// fneg is removable even if it has multiple uses.
if (Op.getOpcode() == ISD::FNEG) return 2;
- // Don't allow anything with multiple uses.
- if (!Op.hasOneUse()) return 0;
+ // Don't allow anything with multiple uses unless we know it is free.
+ EVT VT = Op.getValueType();
+ const SDNodeFlags Flags = Op->getFlags();
+ if (!Op.hasOneUse())
+ if (!(Op.getOpcode() == ISD::FP_EXTEND &&
+ TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
+ return 0;
// Don't recurse exponentially.
if (Depth > 6) return 0;
@@ -671,17 +704,15 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
// Don't invert constant FP values after legalization unless the target says
// the negated constant is legal.
- EVT VT = Op.getValueType();
return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
}
case ISD::FADD:
- // FIXME: determine better conditions for this xform.
- if (!Options->UnsafeFPMath) return 0;
+ if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
+ return 0;
// After operation legalization, it might not be legal to create new FSUBs.
- if (LegalOperations &&
- !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
+ if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
return 0;
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
@@ -694,7 +725,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
if (!Options->NoSignedZerosFPMath &&
- !Op.getNode()->getFlags().hasNoSignedZeros())
+ !Flags.hasNoSignedZeros())
return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
@@ -702,8 +733,6 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
case ISD::FMUL:
case ISD::FDIV:
- if (Options->HonorSignDependentRoundingFPMath()) return 0;
-
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
Options, Depth + 1))
@@ -727,9 +756,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
// fneg is removable even if it has multiple uses.
if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
- // Don't allow anything with multiple uses.
- assert(Op.hasOneUse() && "Unknown reuse!");
-
assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
const SDNodeFlags Flags = Op.getNode()->getFlags();
@@ -742,8 +768,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
}
case ISD::FADD:
- // FIXME: determine better conditions for this xform.
- assert(Options.UnsafeFPMath);
+ assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
@@ -769,8 +794,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
case ISD::FMUL:
case ISD::FDIV:
- assert(!Options.HonorSignDependentRoundingFPMath());
-
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
DAG.getTargetLoweringInfo(), &Options, Depth+1))
@@ -846,7 +869,13 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const {
return false;
}
-// \brief Returns the SDNode if it is a constant float BuildVector
+static SDValue peekThroughBitcast(SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ return V;
+}
+
+// Returns the SDNode if it is a constant float BuildVector
// or constant float.
static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
if (isa<ConstantFPSDNode>(N))
@@ -880,6 +909,7 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
// constant null integer (with no undefs).
// Build vector implicit truncation is not an issue for null values.
static bool isNullConstantOrNullSplatConstant(SDValue N) {
+ // TODO: may want to use peekThroughBitcast() here.
if (ConstantSDNode *Splat = isConstOrConstSplat(N))
return Splat->isNullValue();
return false;
@@ -889,6 +919,7 @@ static bool isNullConstantOrNullSplatConstant(SDValue N) {
// constant integer of one (with no undefs).
// Do not permit build vector implicit truncation.
static bool isOneConstantOrOneSplatConstant(SDValue N) {
+ // TODO: may want to use peekThroughBitcast() here.
unsigned BitWidth = N.getScalarValueSizeInBits();
if (ConstantSDNode *Splat = isConstOrConstSplat(N))
return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
@@ -899,6 +930,7 @@ static bool isOneConstantOrOneSplatConstant(SDValue N) {
// constant integer of all ones (with no undefs).
// Do not permit build vector implicit truncation.
static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
+ N = peekThroughBitcast(N);
unsigned BitWidth = N.getScalarValueSizeInBits();
if (ConstantSDNode *Splat = isConstOrConstSplat(N))
return Splat->isAllOnesValue() &&
@@ -913,56 +945,6 @@ static bool isAnyConstantBuildVector(const SDNode *N) {
ISD::isBuildVectorOfConstantFPSDNodes(N);
}
-// Attempt to match a unary predicate against a scalar/splat constant or
-// every element of a constant BUILD_VECTOR.
-static bool matchUnaryPredicate(SDValue Op,
- std::function<bool(ConstantSDNode *)> Match) {
- if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
- return Match(Cst);
-
- if (ISD::BUILD_VECTOR != Op.getOpcode())
- return false;
-
- EVT SVT = Op.getValueType().getScalarType();
- for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
- auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
- if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
- return false;
- }
- return true;
-}
-
-// Attempt to match a binary predicate against a pair of scalar/splat constants
-// or every element of a pair of constant BUILD_VECTORs.
-static bool matchBinaryPredicate(
- SDValue LHS, SDValue RHS,
- std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
- if (LHS.getValueType() != RHS.getValueType())
- return false;
-
- if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
- if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
- return Match(LHSCst, RHSCst);
-
- if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
- ISD::BUILD_VECTOR != RHS.getOpcode())
- return false;
-
- EVT SVT = LHS.getValueType().getScalarType();
- for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
- auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
- auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
- if (!LHSCst || !RHSCst)
- return false;
- if (LHSCst->getValueType(0) != SVT ||
- LHSCst->getValueType(0) != RHSCst->getValueType(0))
- return false;
- if (!Match(LHSCst, RHSCst))
- return false;
- }
- return true;
-}
-
SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1) {
EVT VT = N0.getValueType();
@@ -1013,11 +995,9 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo) {
assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
++NodesCombined;
- DEBUG(dbgs() << "\nReplacing.1 ";
- N->dump(&DAG);
- dbgs() << "\nWith: ";
- To[0].getNode()->dump(&DAG);
- dbgs() << " and " << NumTo-1 << " other values\n");
+ LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
+ To[0].getNode()->dump(&DAG);
+ dbgs() << " and " << NumTo - 1 << " other values\n");
for (unsigned i = 0, e = NumTo; i != e; ++i)
assert((!To[i].getNode() ||
N->getValueType(i) == To[i].getValueType()) &&
@@ -1074,11 +1054,33 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
// Replace the old value with the new one.
++NodesCombined;
- DEBUG(dbgs() << "\nReplacing.2 ";
- TLO.Old.getNode()->dump(&DAG);
- dbgs() << "\nWith: ";
- TLO.New.getNode()->dump(&DAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
+ dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
+ dbgs() << '\n');
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
+}
+
+/// Check the specified vector node value to see if it can be simplified or
+/// if things it uses can be simplified as it only uses some of the elements.
+/// If so, return true.
+bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+ bool AssumeSingleUse) {
+ TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
+ APInt KnownUndef, KnownZero;
+ if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
+ 0, AssumeSingleUse))
+ return false;
+
+ // Revisit the node.
+ AddToWorklist(Op.getNode());
+
+ // Replace the old value with the new one.
+ ++NodesCombined;
+ LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
+ dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
+ dbgs() << '\n');
CommitTargetLoweringOpt(TLO);
return true;
@@ -1089,11 +1091,8 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
EVT VT = Load->getValueType(0);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
- DEBUG(dbgs() << "\nReplacing.9 ";
- Load->dump(&DAG);
- dbgs() << "\nWith: ";
- Trunc.getNode()->dump(&DAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
+ Trunc.getNode()->dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
@@ -1107,10 +1106,8 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
if (ISD::isUNINDEXEDLoad(Op.getNode())) {
LoadSDNode *LD = cast<LoadSDNode>(Op);
EVT MemVT = LD->getMemoryVT();
- ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
- : ISD::EXTLOAD)
- : LD->getExtensionType();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
+ : LD->getExtensionType();
Replace = true;
return DAG.getExtLoad(ExtType, DL, PVT,
LD->getChain(), LD->getBasePtr(),
@@ -1194,7 +1191,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
- DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
bool Replace0 = false;
SDValue N0 = Op.getOperand(0);
@@ -1259,7 +1256,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
- DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
bool Replace = false;
SDValue N0 = Op.getOperand(0);
@@ -1311,8 +1308,7 @@ SDValue DAGCombiner::PromoteExtend(SDValue Op) {
// fold (aext (aext x)) -> (aext x)
// fold (aext (zext x)) -> (zext x)
// fold (aext (sext x)) -> (sext x)
- DEBUG(dbgs() << "\nPromoting ";
- Op.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
}
return SDValue();
@@ -1345,20 +1341,15 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
SDNode *N = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
- ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
- : ISD::EXTLOAD)
- : LD->getExtensionType();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
+ : LD->getExtensionType();
SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
LD->getChain(), LD->getBasePtr(),
MemVT, LD->getMemOperand());
SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
- DEBUG(dbgs() << "\nPromoting ";
- N->dump(&DAG);
- dbgs() << "\nTo: ";
- Result.getNode()->dump(&DAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
+ Result.getNode()->dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
@@ -1369,7 +1360,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
return false;
}
-/// \brief Recursively delete a node which has no uses and any operands for
+/// Recursively delete a node which has no uses and any operands for
/// which it is the only use.
///
/// Note that this both deletes the nodes and removes them from the worklist.
@@ -1453,7 +1444,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
continue;
}
- DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
// Add any operands of the new node which have not yet been combined to the
// worklist as well. Because the worklist uniques things already, this
@@ -1481,8 +1472,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
RV.getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
- DEBUG(dbgs() << " ... into: ";
- RV.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
if (N->getNumValues() == RV.getNode()->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode());
@@ -1558,7 +1548,6 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::VSELECT: return visitVSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
- case ISD::SETCCE: return visitSETCCE(N);
case ISD::SETCCCARRY: return visitSETCCCARRY(N);
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
@@ -1708,6 +1697,10 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
return N->getOperand(1);
}
+ // Don't simplify token factors if optnone.
+ if (OptLevel == CodeGenOpt::None)
+ return SDValue();
+
SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
SmallPtrSet<SDNode*, 16> SeenOps;
@@ -1893,16 +1886,16 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
"Unexpected binary operator");
- // Bail out if any constants are opaque because we can't constant fold those.
- SDValue C1 = BO->getOperand(1);
- if (!isConstantOrConstantVector(C1, true) &&
- !isConstantFPBuildVectorOrConstantFP(C1))
- return SDValue();
-
// Don't do this unless the old select is going away. We want to eliminate the
// binary operator, not replace a binop with a select.
// TODO: Handle ISD::SELECT_CC.
+ unsigned SelOpNo = 0;
SDValue Sel = BO->getOperand(0);
+ if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
+ SelOpNo = 1;
+ Sel = BO->getOperand(1);
+ }
+
if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
return SDValue();
@@ -1916,19 +1909,48 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
!isConstantFPBuildVectorOrConstantFP(CF))
return SDValue();
+ // Bail out if any constants are opaque because we can't constant fold those.
+ // The exception is "and" and "or" with either 0 or -1 in which case we can
+ // propagate non constant operands into select. I.e.:
+ // and (select Cond, 0, -1), X --> select Cond, 0, X
+ // or X, (select Cond, -1, 0) --> select Cond, -1, X
+ bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
+ (isNullConstantOrNullSplatConstant(CT) ||
+ isAllOnesConstantOrAllOnesSplatConstant(CT)) &&
+ (isNullConstantOrNullSplatConstant(CF) ||
+ isAllOnesConstantOrAllOnesSplatConstant(CF));
+
+ SDValue CBO = BO->getOperand(SelOpNo ^ 1);
+ if (!CanFoldNonConst &&
+ !isConstantOrConstantVector(CBO, true) &&
+ !isConstantFPBuildVectorOrConstantFP(CBO))
+ return SDValue();
+
+ EVT VT = Sel.getValueType();
+
+ // In case of shift value and shift amount may have different VT. For instance
+ // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
+ // swapped operands and value types do not match. NB: x86 is fine if operands
+ // are not swapped with shift amount VT being not bigger than shifted value.
+ // TODO: that is possible to check for a shift operation, correct VTs and
+ // still perform optimization on x86 if needed.
+ if (SelOpNo && VT != CBO.getValueType())
+ return SDValue();
+
// We have a select-of-constants followed by a binary operator with a
// constant. Eliminate the binop by pulling the constant math into the select.
- // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
- EVT VT = Sel.getValueType();
+ // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
SDLoc DL(Sel);
- SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
- if (!NewCT.isUndef() &&
+ SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
+ : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
+ if (!CanFoldNonConst && !NewCT.isUndef() &&
!isConstantOrConstantVector(NewCT, true) &&
!isConstantFPBuildVectorOrConstantFP(NewCT))
return SDValue();
- SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
- if (!NewCF.isUndef() &&
+ SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
+ : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
+ if (!CanFoldNonConst && !NewCF.isUndef() &&
!isConstantOrConstantVector(NewCF, true) &&
!isConstantFPBuildVectorOrConstantFP(NewCF))
return SDValue();
@@ -1936,6 +1958,84 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
}
+static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
+ assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ "Expecting add or sub");
+
+ // Match a constant operand and a zext operand for the math instruction:
+ // add Z, C
+ // sub C, Z
+ bool IsAdd = N->getOpcode() == ISD::ADD;
+ SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
+ SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
+ auto *CN = dyn_cast<ConstantSDNode>(C);
+ if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+
+ // Match the zext operand as a setcc of a boolean.
+ if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
+ Z.getOperand(0).getValueType() != MVT::i1)
+ return SDValue();
+
+ // Match the compare as: setcc (X & 1), 0, eq.
+ SDValue SetCC = Z.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
+ if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
+ SetCC.getOperand(0).getOpcode() != ISD::AND ||
+ !isOneConstant(SetCC.getOperand(0).getOperand(1)))
+ return SDValue();
+
+ // We are adding/subtracting a constant and an inverted low bit. Turn that
+ // into a subtract/add of the low bit with incremented/decremented constant:
+ // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
+ // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
+ EVT VT = C.getValueType();
+ SDLoc DL(N);
+ SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
+ SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
+ DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
+ return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
+}
+
+/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
+/// a shift and add with a different constant.
+static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
+ assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ "Expecting add or sub");
+
+ // We need a constant operand for the add/sub, and the other operand is a
+ // logical shift right: add (srl), C or sub C, (srl).
+ bool IsAdd = N->getOpcode() == ISD::ADD;
+ SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
+ SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
+ ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
+ if (!C || ShiftOp.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ // The shift must be of a 'not' value.
+ // TODO: Use isBitwiseNot() if it works with vectors.
+ SDValue Not = ShiftOp.getOperand(0);
+ if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR ||
+ !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1)))
+ return SDValue();
+
+ // The shift must be moving the sign bit to the least-significant-bit.
+ EVT VT = ShiftOp.getValueType();
+ SDValue ShAmt = ShiftOp.getOperand(1);
+ ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
+ if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
+ return SDValue();
+
+ // Eliminate the 'not' by adjusting the shift and add/sub constant:
+ // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
+ // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
+ SDLoc DL(N);
+ auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
+ SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
+ APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
+ return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
+}
+
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2067,6 +2167,12 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
}
+ if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
+ return V;
+
+ if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ return V;
+
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -2075,6 +2181,11 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
DAG.haveNoCommonBitsSet(N0, N1))
return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+ // fold (add (xor a, -1), 1) -> (sub 0, a)
+ if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1))
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ N0.getOperand(0));
+
if (SDValue Combined = visitADDLike(N0, N1, N))
return Combined;
@@ -2210,6 +2321,38 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
return SDValue();
}
+static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
+ SelectionDAG &DAG, const TargetLowering &TLI) {
+ SDValue Cst;
+ switch (TLI.getBooleanContents(VT)) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ Cst = DAG.getConstant(1, DL, VT);
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ Cst = DAG.getConstant(-1, DL, VT);
+ break;
+ }
+
+ return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
+}
+
+static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
+ if (V.getOpcode() != ISD::XOR) return false;
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ if (!Const) return false;
+
+ switch(TLI.getBooleanContents(VT)) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ return Const->isOne();
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return Const->isAllOnesValue();
+ case TargetLowering::UndefinedBooleanContent:
+ return (Const->getAPIntValue() & 0x01) == 1;
+ }
+ llvm_unreachable("Unsupported boolean content");
+}
+
SDValue DAGCombiner::visitUADDO(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2240,6 +2383,15 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) {
return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
DAG.getConstant(0, DL, CarryVT));
+ // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
+ if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) {
+ SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
+ DAG.getConstant(0, DL, VT),
+ N0.getOperand(0));
+ return CombineTo(N, Sub,
+ flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
+ }
+
if (SDValue Combined = visitUADDOLike(N0, N1, N))
return Combined;
@@ -2303,13 +2455,17 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
// fold (addcarry x, y, false) -> (uaddo x, y)
- if (isNullConstant(CarryIn))
- return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
+ return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
+ }
+
+ EVT CarryVT = CarryIn.getValueType();
// fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
if (isNullConstant(N0) && isNullConstant(N1)) {
EVT VT = N0.getValueType();
- EVT CarryVT = CarryIn.getValueType();
SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
AddToWorklist(CarryExt.getNode());
return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
@@ -2317,6 +2473,16 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
DAG.getConstant(0, DL, CarryVT));
}
+ // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
+ if (isBitwiseNot(N0) && isNullConstant(N1) &&
+ isBooleanFlip(CarryIn, CarryVT, TLI)) {
+ SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
+ DAG.getConstant(0, DL, N0.getValueType()),
+ N0.getOperand(0), CarryIn.getOperand(0));
+ return CombineTo(N, Sub,
+ flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
+ }
+
if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
return Combined;
@@ -2458,6 +2624,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (isAllOnesConstantOrAllOnesSplatConstant(N0))
return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+ // fold (A - (0-B)) -> A+B
+ if (N1.getOpcode() == ISD::SUB &&
+ isNullConstantOrNullSplatConstant(N1.getOperand(0)))
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
+
// fold A-(A-B) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
return N1.getOperand(1);
@@ -2500,12 +2671,50 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
N0.getOperand(1).getOperand(0));
+ // fold (X - (-Y * Z)) -> (X + (Y * Z))
+ if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
+ if (N1.getOperand(0).getOpcode() == ISD::SUB &&
+ isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
+ N1.getOperand(0).getOperand(1),
+ N1.getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
+ }
+ if (N1.getOperand(1).getOpcode() == ISD::SUB &&
+ isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
+ N1.getOperand(0),
+ N1.getOperand(1).getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
+ }
+ }
+
// If either operand of a sub is undef, the result is undef
if (N0.isUndef())
return N0;
if (N1.isUndef())
return N1;
+ if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
+ return V;
+
+ if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ return V;
+
+ // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
+ if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+ if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
+ SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
+ SDValue S0 = N1.getOperand(0);
+ if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
+ if (C->getAPIntValue() == (OpSizeInBits - 1))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
+ }
+ }
+ }
+
// If the relocation model supports it, consider symbol offsets.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
@@ -2612,8 +2821,11 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
SDValue CarryIn = N->getOperand(2);
// fold (subcarry x, y, false) -> (usubo x, y)
- if (isNullConstant(CarryIn))
- return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
+ return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
+ }
return SDValue();
}
@@ -2689,11 +2901,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
(!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
SDLoc DL(N);
SDValue LogBase2 = BuildLogBase2(N1, DL);
- AddToWorklist(LogBase2.getNode());
-
EVT ShiftVT = getShiftAmountTy(N0.getValueType());
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
- AddToWorklist(Trunc.getNode());
return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
}
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
@@ -2816,9 +3025,10 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
SDValue Op1 = Node->getOperand(1);
SDValue combined;
for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE;) {
- SDNode *User = *UI++;
- if (User == Node || User->use_empty())
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
+ User->use_empty())
continue;
// Convert the other matching node(s), too;
// otherwise, the DIVREM may get target-legalized into something
@@ -2868,6 +3078,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
// fold vector ops
if (VT.isVector())
@@ -2887,6 +3098,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
+ // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
+ if (N1C && N1C->getAPIntValue().isMinSignedValue())
+ return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
+ DAG.getConstant(1, DL, VT),
+ DAG.getConstant(0, DL, VT));
if (SDValue V = simplifyDivRem(N, DAG))
return V;
@@ -2899,45 +3115,90 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
+ if (SDValue V = visitSDIVLike(N0, N1, N))
+ return V;
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+ // true. Otherwise, we break the simplification logic in visitREM().
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
+ unsigned BitWidth = VT.getScalarSizeInBits();
+
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+ // Helper for determining whether a value is a power-2 constant scalar or a
+ // vector of such elements.
+ auto IsPowerOfTwo = [](ConstantSDNode *C) {
+ if (C->isNullValue() || C->isOpaque())
+ return false;
+ if (C->getAPIntValue().isPowerOf2())
+ return true;
+ if ((-C->getAPIntValue()).isPowerOf2())
+ return true;
+ return false;
+ };
+
// fold (sdiv X, pow2) -> simple ops after legalize
// FIXME: We check for the exact bit here because the generic lowering gives
// better results in that case. The target-specific lowering should learn how
// to handle exact sdivs efficiently.
- if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
- !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
- (-N1C->getAPIntValue()).isPowerOf2())) {
+ if (!N->getFlags().hasExact() &&
+ ISD::matchUnaryPredicate(N1C ? SDValue(N1C, 0) : N1, IsPowerOfTwo)) {
// Target-specific implementation of sdiv x, pow2.
if (SDValue Res = BuildSDIVPow2(N))
return Res;
- unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
+ // Create constants that are functions of the shift amount value.
+ EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
+ SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
+ SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
+ C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
+ SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
+ if (!isConstantOrConstantVector(Inexact))
+ return SDValue();
// Splat the sign bit into the register
- SDValue SGN =
- DAG.getNode(ISD::SRA, DL, VT, N0,
- DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
- getShiftAmountTy(N0.getValueType())));
- AddToWorklist(SGN.getNode());
+ SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
+ DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
+ AddToWorklist(Sign.getNode());
// Add (N0 < 0) ? abs2 - 1 : 0;
- SDValue SRL =
- DAG.getNode(ISD::SRL, DL, VT, SGN,
- DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
- getShiftAmountTy(SGN.getValueType())));
- SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
- AddToWorklist(SRL.getNode());
- AddToWorklist(ADD.getNode()); // Divide by pow2
- SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
- DAG.getConstant(lg2, DL,
- getShiftAmountTy(ADD.getValueType())));
-
- // If we're dividing by a positive value, we're done. Otherwise, we must
- // negate the result.
- if (N1C->getAPIntValue().isNonNegative())
- return SRA;
-
- AddToWorklist(SRA.getNode());
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
+ SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
+ AddToWorklist(Srl.getNode());
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
+ AddToWorklist(Add.getNode());
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
+ AddToWorklist(Sra.getNode());
+
+ // Special case: (sdiv X, 1) -> X
+ // Special Case: (sdiv X, -1) -> 0-X
+ SDValue One = DAG.getConstant(1, DL, VT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
+ SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
+ SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
+ SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
+ Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
+
+ // If dividing by a positive value, we're done. Otherwise, the result must
+ // be negated.
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
+
+ // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
+ SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
+ SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
+ return Res;
}
// If integer divide is expensive and we satisfy the requirements, emit an
@@ -2948,13 +3209,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (SDValue Op = BuildSDIV(N))
return Op;
- // sdiv, srem -> sdivrem
- // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
- // true. Otherwise, we break the simplification logic in visitREM().
- if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
- if (SDValue DivRem = useDivRem(N))
- return DivRem;
-
return SDValue();
}
@@ -2962,6 +3216,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
// fold vector ops
if (VT.isVector())
@@ -2977,6 +3232,14 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
N0C, N1C))
return Folded;
+ // fold (udiv X, 1) -> X
+ if (N1C && N1C->isOne())
+ return N0;
+ // fold (udiv X, -1) -> select(X == -1, 1, 0)
+ if (N1C && N1C->getAPIntValue().isAllOnesValue())
+ return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
+ DAG.getConstant(1, DL, VT),
+ DAG.getConstant(0, DL, VT));
if (SDValue V = simplifyDivRem(N, DAG))
return V;
@@ -2984,6 +3247,26 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
+ if (SDValue V = visitUDIVLike(N0, N1, N))
+ return V;
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+ // true. Otherwise, we break the simplification logic in visitREM().
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
// fold (udiv x, (1 << c)) -> x >>u c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1)) {
@@ -3019,13 +3302,6 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue Op = BuildUDIV(N))
return Op;
- // sdiv, srem -> sdivrem
- // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
- // true. Otherwise, we break the simplification logic in visitREM().
- if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
- if (SDValue DivRem = useDivRem(N))
- return DivRem;
-
return SDValue();
}
@@ -3035,6 +3311,8 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
+
bool isSigned = (Opcode == ISD::SREM);
SDLoc DL(N);
@@ -3044,6 +3322,10 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (N0C && N1C)
if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
return Folded;
+ // fold (urem X, -1) -> select(X == -1, 0, x)
+ if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
+ return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
+ DAG.getConstant(0, DL, VT), N0);
if (SDValue V = simplifyDivRem(N, DAG))
return V;
@@ -3077,22 +3359,19 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
- // To avoid mangling nodes, this simplification requires that the combine()
- // call for the speculative DIV must not cause a DIVREM conversion. We guard
- // against this by skipping the simplification if isIntDivCheap(). When
- // div is not cheap, combine will not return a DIVREM. Regardless,
- // checking cheapness here makes sense since the simplification results in
- // fatter code.
- if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
- unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
- SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
- AddToWorklist(Div.getNode());
- SDValue OptimizedDiv = combine(Div.getNode());
- if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
- assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
- (OptimizedDiv.getOpcode() != ISD::SDIVREM));
+ // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
+ // speculative DIV must not cause a DIVREM conversion. We guard against this
+ // by skipping the simplification if isIntDivCheap(). When div is not cheap,
+ // combine will not return a DIVREM. Regardless, checking cheapness here
+ // makes sense since the simplification results in fatter code.
+ if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
+ SDValue OptimizedDiv =
+ isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
+ if (OptimizedDiv.getNode() && OptimizedDiv.getOpcode() != ISD::UDIVREM &&
+ OptimizedDiv.getOpcode() != ISD::SDIVREM) {
SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+ AddToWorklist(OptimizedDiv.getNode());
AddToWorklist(Mul.getNode());
return Sub;
}
@@ -3350,6 +3629,25 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
+ // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
+ // Only do this if the current op isn't legal and the flipped is.
+ unsigned Opcode = N->getOpcode();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isOperationLegal(Opcode, VT) &&
+ (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
+ (N1.isUndef() || DAG.SignBitIsZero(N1))) {
+ unsigned AltOpcode;
+ switch (Opcode) {
+ case ISD::SMIN: AltOpcode = ISD::UMIN; break;
+ case ISD::SMAX: AltOpcode = ISD::UMAX; break;
+ case ISD::UMIN: AltOpcode = ISD::SMIN; break;
+ case ISD::UMAX: AltOpcode = ISD::SMAX; break;
+ default: llvm_unreachable("Unknown MINMAX opcode");
+ }
+ if (TLI.isOperationLegal(AltOpcode, VT))
+ return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
+ }
+
return SDValue();
}
@@ -3469,9 +3767,9 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
ShOp = SDValue();
}
- // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
- // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C)
- // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
+ // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C)
+ // (OR (shuf (A, C), shuf (B, C))) -> shuf (OR (A, B), C)
+ // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)
if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
N0->getOperand(0), N1->getOperand(0));
@@ -3490,9 +3788,9 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
ShOp = SDValue();
}
- // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
- // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B))
- // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
+ // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B))
+ // (OR (shuf (C, A), shuf (C, B))) -> shuf (C, OR (A, B))
+ // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))
if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
N0->getOperand(1), N1->getOperand(1));
@@ -3525,7 +3823,7 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
// operations on the left and right operands, so those types must match.
EVT VT = N0.getValueType();
EVT OpVT = LL.getValueType();
- if (LegalOperations || VT != MVT::i1)
+ if (LegalOperations || VT.getScalarType() != MVT::i1)
if (VT != getSetCCResultType(OpVT))
return SDValue();
if (OpVT != RL.getValueType())
@@ -3762,53 +4060,78 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
return true;
}
-bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
- EVT &ExtVT, unsigned ShAmt) {
- // Don't transform one with multiple uses, this would require adding a new
- // load.
- if (!SDValue(LoadN, 0).hasOneUse())
+bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
+ ISD::LoadExtType ExtType, EVT &MemVT,
+ unsigned ShAmt) {
+ if (!LDST)
return false;
-
- if (LegalOperations &&
- !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT))
+ // Only allow byte offsets.
+ if (ShAmt % 8)
return false;
// Do not generate loads of non-round integer types since these can
// be expensive (and would be wrong if the type is not byte sized).
- if (!ExtVT.isRound())
+ if (!MemVT.isRound())
return false;
// Don't change the width of a volatile load.
- if (LoadN->isVolatile())
+ if (LDST->isVolatile())
return false;
// Verify that we are actually reducing a load width here.
- if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits())
+ if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
return false;
- // For the transform to be legal, the load must produce only two values
- // (the value loaded and the chain). Don't transform a pre-increment
- // load, for example, which produces an extra value. Otherwise the
- // transformation is not equivalent, and the downstream logic to replace
- // uses gets things wrong.
- if (LoadN->getNumValues() > 2)
- return false;
-
- // If the load that we're shrinking is an extload and we're not just
- // discarding the extension we can't simply shrink the load. Bail.
- // TODO: It would be possible to merge the extensions in some cases.
- if (LoadN->getExtensionType() != ISD::NON_EXTLOAD &&
- LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
- return false;
-
- if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT))
+ // Ensure that this isn't going to produce an unsupported unaligned access.
+ if (ShAmt &&
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+ LDST->getAddressSpace(), ShAmt / 8))
return false;
// It's not possible to generate a constant of extended or untyped type.
- EVT PtrType = LoadN->getOperand(1).getValueType();
+ EVT PtrType = LDST->getBasePtr().getValueType();
if (PtrType == MVT::Untyped || PtrType.isExtended())
return false;
+ if (isa<LoadSDNode>(LDST)) {
+ LoadSDNode *Load = cast<LoadSDNode>(LDST);
+ // Don't transform one with multiple uses, this would require adding a new
+ // load.
+ if (!SDValue(Load, 0).hasOneUse())
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
+ return false;
+
+ // For the transform to be legal, the load must produce only two values
+ // (the value loaded and the chain). Don't transform a pre-increment
+ // load, for example, which produces an extra value. Otherwise the
+ // transformation is not equivalent, and the downstream logic to replace
+ // uses gets things wrong.
+ if (Load->getNumValues() > 2)
+ return false;
+
+ // If the load that we're shrinking is an extload and we're not just
+ // discarding the extension we can't simply shrink the load. Bail.
+ // TODO: It would be possible to merge the extensions in some cases.
+ if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
+ Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
+ return false;
+
+ if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
+ return false;
+ } else {
+ assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
+ StoreSDNode *Store = cast<StoreSDNode>(LDST);
+ // Can't write outside the original store
+ if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
+ return false;
+ }
return true;
}
@@ -3841,7 +4164,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
auto *Load = cast<LoadSDNode>(Op);
EVT ExtVT;
if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
- isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
+ isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
// ZEXTLOAD is already small enough.
if (Load->getExtensionType() == ISD::ZEXTLOAD &&
@@ -3882,7 +4205,23 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
// Allow one node which will masked along with any loads found.
if (NodeToMask)
return false;
+
+ // Also ensure that the node to be masked only produces one data result.
NodeToMask = Op.getNode();
+ if (NodeToMask->getNumValues() > 1) {
+ bool HasValue = false;
+ for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
+ MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
+ if (VT != MVT::Glue && VT != MVT::Other) {
+ if (HasValue) {
+ NodeToMask = nullptr;
+ return false;
+ }
+ HasValue = true;
+ }
+ }
+ assert(HasValue && "Node to be masked has no data result?");
+ }
}
return true;
}
@@ -3906,19 +4245,19 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
if (Loads.size() == 0)
return false;
- DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
+ LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
SDValue MaskOp = N->getOperand(1);
// If it exists, fixup the single node we allow in the tree that needs
// masking.
if (FixupNode) {
- DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
+ LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
FixupNode->getValueType(0),
SDValue(FixupNode, 0), MaskOp);
DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
- DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0),
- MaskOp);
+ if (And.getOpcode() == ISD ::AND)
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
}
// Narrow any constants that need it.
@@ -3937,11 +4276,13 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
// Create narrow loads.
for (auto *Load : Loads) {
- DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
+ LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
SDValue(Load, 0), MaskOp);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
- DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp);
+ if (And.getOpcode() == ISD ::AND)
+ And = SDValue(
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
SDValue NewLoad = ReduceLoadWidth(And.getNode());
assert(NewLoad &&
"Shouldn't be masking the load if it can't be narrowed");
@@ -3953,6 +4294,60 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
return false;
}
+// Unfold
+// x & (-1 'logical shift' y)
+// To
+// (x 'opposite logical shift' y) 'logical shift' y
+// if it is better for performance.
+SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND);
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Do we actually prefer shifts over mask?
+ if (!TLI.preferShiftsToClearExtremeBits(N0))
+ return SDValue();
+
+ // Try to match (-1 '[outer] logical shift' y)
+ unsigned OuterShift;
+ unsigned InnerShift; // The opposite direction to the OuterShift.
+ SDValue Y; // Shift amount.
+ auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
+ if (!M.hasOneUse())
+ return false;
+ OuterShift = M->getOpcode();
+ if (OuterShift == ISD::SHL)
+ InnerShift = ISD::SRL;
+ else if (OuterShift == ISD::SRL)
+ InnerShift = ISD::SHL;
+ else
+ return false;
+ if (!isAllOnesConstant(M->getOperand(0)))
+ return false;
+ Y = M->getOperand(1);
+ return true;
+ };
+
+ SDValue X;
+ if (matchMask(N1))
+ X = N0;
+ else if (matchMask(N0))
+ X = N1;
+ else
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // tmp = x 'opposite logical shift' y
+ SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
+ // ret = tmp 'logical shift' y
+ SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
+
+ return T1;
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4019,7 +4414,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
};
if (N0.getOpcode() == ISD::OR &&
- matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
+ ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
return N1;
// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
@@ -4250,6 +4645,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return BSwap;
}
+ if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
+ return Shifts;
+
return SDValue();
}
@@ -4276,7 +4674,10 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
if (!N0.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (!N01C || N01C->getZExtValue() != 0xFF00)
+ // Also handle 0xffff since the LHS is guaranteed to have zeros there.
+ // This is needed for X86.
+ if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
+ N01C->getZExtValue() != 0xFFFF))
return SDValue();
N0 = N0.getOperand(0);
LookPassAnd0 = true;
@@ -4323,7 +4724,10 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
if (!N10.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
- if (!N101C || N101C->getZExtValue() != 0xFF00)
+ // Also allow 0xFFFF since the bits will be shifted out. This is needed
+ // for X86.
+ if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
+ N101C->getZExtValue() != 0xFFFF))
return SDValue();
N10 = N10.getOperand(0);
LookPassAnd1 = true;
@@ -4394,6 +4798,14 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
return false;
case 0xFF: MaskByteOffset = 0; break;
case 0xFF00: MaskByteOffset = 1; break;
+ case 0xFFFF:
+ // In case demanded bits didn't clear the bits that will be shifted out.
+ // This is needed for X86.
+ if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
+ MaskByteOffset = 1;
+ break;
+ }
+ return false;
case 0xFF0000: MaskByteOffset = 2; break;
case 0xFF000000: MaskByteOffset = 3; break;
}
@@ -4708,7 +5120,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
};
if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
- matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
+ ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
if (SDValue COR = DAG.FoldConstantArithmetic(
ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
@@ -4764,7 +5176,8 @@ bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
// in direction shift1 by Neg. The range [0, EltSize) means that we only need
// to consider shift amounts with defined behavior.
-static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
+static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
+ SelectionDAG &DAG) {
// If EltSize is a power of 2 then:
//
// (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
@@ -4799,9 +5212,13 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
unsigned MaskLoBits = 0;
if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
- if (NegC->getAPIntValue() == EltSize - 1) {
+ KnownBits Known;
+ DAG.computeKnownBits(Neg.getOperand(0), Known);
+ unsigned Bits = Log2_64(EltSize);
+ if (NegC->getAPIntValue().getActiveBits() <= Bits &&
+ ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
Neg = Neg.getOperand(0);
- MaskLoBits = Log2_64(EltSize);
+ MaskLoBits = Bits;
}
}
}
@@ -4816,10 +5233,16 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
// On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
// Pos'. The truncation is redundant for the purpose of the equality.
- if (MaskLoBits && Pos.getOpcode() == ISD::AND)
- if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
- if (PosC->getAPIntValue() == EltSize - 1)
+ if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
+ KnownBits Known;
+ DAG.computeKnownBits(Pos.getOperand(0), Known);
+ if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
+ ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
+ MaskLoBits))
Pos = Pos.getOperand(0);
+ }
+ }
// The condition we need is now:
//
@@ -4875,7 +5298,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
// (srl x, (*ext y))) ->
// (rotr x, y) or (rotl x, (sub 32, y))
EVT VT = Shifted.getValueType();
- if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
+ if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
HasPos ? Pos : Neg).getNode();
@@ -4893,8 +5316,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
if (!TLI.isTypeLegal(VT)) return nullptr;
// The target must have at least one rotate flavor.
- bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
- bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
+ bool HasROTL = hasOperation(ISD::ROTL, VT);
+ bool HasROTR = hasOperation(ISD::ROTR, VT);
if (!HasROTL && !HasROTR) return nullptr;
// Check for truncated rotate.
@@ -4943,7 +5366,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
ConstantSDNode *RHS) {
return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
};
- if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
+ if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
@@ -5200,7 +5623,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
Optional<BaseIndexOffset> Base;
SDValue Chain;
- SmallSet<LoadSDNode *, 8> Loads;
+ SmallPtrSet<LoadSDNode *, 8> Loads;
Optional<ByteProvider> FirstByteProvider;
int64_t FirstOffset = INT64_MAX;
@@ -5299,6 +5722,88 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
}
+// If the target has andn, bsl, or a similar bit-select instruction,
+// we want to unfold masked merge, with canonical pattern of:
+// | A | |B|
+// ((x ^ y) & m) ^ y
+// | D |
+// Into:
+// (x & m) | (y & ~m)
+// If y is a constant, and the 'andn' does not work with immediates,
+// we unfold into a different pattern:
+// ~(~x & m) & (m | y)
+// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
+// the very least that breaks andnpd / andnps patterns, and because those
+// patterns are simplified in IR and shouldn't be created in the DAG
+SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
+ assert(N->getOpcode() == ISD::XOR);
+
+ // Don't touch 'not' (i.e. where y = -1).
+ if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1)))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ // There are 3 commutable operators in the pattern,
+ // so we have to deal with 8 possible variants of the basic pattern.
+ SDValue X, Y, M;
+ auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
+ if (And.getOpcode() != ISD::AND || !And.hasOneUse())
+ return false;
+ SDValue Xor = And.getOperand(XorIdx);
+ if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
+ return false;
+ SDValue Xor0 = Xor.getOperand(0);
+ SDValue Xor1 = Xor.getOperand(1);
+ // Don't touch 'not' (i.e. where y = -1).
+ if (isAllOnesConstantOrAllOnesSplatConstant(Xor1))
+ return false;
+ if (Other == Xor0)
+ std::swap(Xor0, Xor1);
+ if (Other != Xor1)
+ return false;
+ X = Xor0;
+ Y = Xor1;
+ M = And.getOperand(XorIdx ? 0 : 1);
+ return true;
+ };
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
+ !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
+ return SDValue();
+
+ // Don't do anything if the mask is constant. This should not be reachable.
+ // InstCombine should have already unfolded this pattern, and DAGCombiner
+ // probably shouldn't produce it, too.
+ if (isa<ConstantSDNode>(M.getNode()))
+ return SDValue();
+
+ // We can transform if the target has AndNot
+ if (!TLI.hasAndNot(M))
+ return SDValue();
+
+ SDLoc DL(N);
+
+ // If Y is a constant, check that 'andn' works with immediates.
+ if (!TLI.hasAndNot(Y)) {
+ assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
+ // If not, we need to do a bit more work to make sure andn is still used.
+ SDValue NotX = DAG.getNOT(DL, X, VT);
+ SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
+ SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
+ SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
+ return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
+ }
+
+ SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
+ SDValue NotM = DAG.getNOT(DL, M, VT);
+ SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
+
+ return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
+}
+
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5378,7 +5883,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
- if (isOneConstant(N1) && VT == MVT::i1 &&
+ if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
(N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
@@ -5390,7 +5895,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
- if (isAllOnesConstant(N1) &&
+ if (isAllOnesConstant(N1) && N0.hasOneUse() &&
(N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
@@ -5411,13 +5916,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
- unsigned OpSizeInBits = VT.getScalarSizeInBits();
- if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
- N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
- TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
- if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
- if (C->getAPIntValue() == (OpSizeInBits - 1))
- return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
+ if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+ SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1;
+ SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1;
+ if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
+ SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
+ SDValue S0 = S.getOperand(0);
+ if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
+ if (C->getAPIntValue() == (OpSizeInBits - 1))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
+ }
+ }
}
// fold (xor x, x) -> 0
@@ -5454,6 +5965,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
return Tmp;
+ // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
+ if (SDValue MM = unfoldMaskedMerge(N))
+ return MM;
+
// Simplify the expression using non-local knowledge.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -5656,7 +6171,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
return Val->getAPIntValue().uge(OpSizeInBits);
};
- if (matchUnaryPredicate(N1, MatchShiftTooBig))
+ if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
return DAG.getUNDEF(VT);
// fold (shl x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -5691,7 +6206,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).uge(OpSizeInBits);
};
- if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
return DAG.getConstant(0, SDLoc(N), VT);
auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
@@ -5701,7 +6216,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).ult(OpSizeInBits);
};
- if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
SDLoc DL(N);
EVT ShiftVT = N1.getValueType();
SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
@@ -5877,7 +6392,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
return Val->getAPIntValue().uge(OpSizeInBits);
};
- if (matchUnaryPredicate(N1, MatchShiftTooBig))
+ if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
return DAG.getUNDEF(VT);
// fold (sra x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -5912,7 +6427,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).uge(OpSizeInBits);
};
- if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
@@ -5923,7 +6438,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).ult(OpSizeInBits);
};
- if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
}
@@ -6041,7 +6556,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
return Val->getAPIntValue().uge(OpSizeInBits);
};
- if (matchUnaryPredicate(N1, MatchShiftTooBig))
+ if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
return DAG.getUNDEF(VT);
// fold (srl x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -6064,7 +6579,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).uge(OpSizeInBits);
};
- if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
return DAG.getConstant(0, SDLoc(N), VT);
auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
@@ -6074,7 +6589,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).ult(OpSizeInBits);
};
- if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
SDLoc DL(N);
EVT ShiftVT = N1.getValueType();
SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
@@ -6285,6 +6800,13 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {
// fold (ctlz c1) -> c2
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
+
+ // If the value is known never to be zero, switch to the undef version.
+ if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
+ if (DAG.isKnownNeverZero(N0))
+ return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ }
+
return SDValue();
}
@@ -6305,6 +6827,13 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
// fold (cttz c1) -> c2
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
+
+ // If the value is known never to be zero, switch to the undef version.
+ if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
+ if (DAG.isKnownNeverZero(N0))
+ return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ }
+
return SDValue();
}
@@ -6328,7 +6857,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
return SDValue();
}
-/// \brief Generate Min/Max node
+/// Generate Min/Max node
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True, SDValue False,
ISD::CondCode CC, const TargetLowering &TLI,
@@ -6443,9 +6972,9 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
// in another basic block or it could require searching a complicated
// expression.
if (CondVT.isInteger() &&
- TLI.getBooleanContents(false, true) ==
+ TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
TargetLowering::ZeroOrOneBooleanContent &&
- TLI.getBooleanContents(false, false) ==
+ TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
TargetLowering::ZeroOrOneBooleanContent &&
C1->isNullValue() && C2->isOne()) {
SDValue NotCond =
@@ -6574,15 +7103,10 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
}
- // select (xor Cond, 1), X, Y -> select Cond, Y, X
if (VT0 == MVT::i1) {
- if (N0->getOpcode() == ISD::XOR) {
- if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
- SDValue Cond0 = N0->getOperand(0);
- if (C->isOne())
- return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1);
- }
- }
+ // select (not Cond), N1, N2 -> select Cond, N2, N1
+ if (isBitwiseNot(N0))
+ return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
}
// fold selects based on a setcc into other things, such as min/max/abs
@@ -6726,6 +7250,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
SDValue DataLo, DataHi;
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+ SDValue Scale = MSC->getScale();
SDValue BasePtr = MSC->getBasePtr();
SDValue IndexLo, IndexHi;
std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
@@ -6735,11 +7260,11 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, MSC->getAAInfo(), MSC->getRanges());
- SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
+ SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
DL, OpsLo, MMO);
- SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
+ SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale };
Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
DL, OpsHi, MMO);
@@ -6800,12 +7325,12 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
MST->isCompressingStore());
+ unsigned HiOffset = LoMemVT.getStoreSize();
- MMO = DAG.getMachineFunction().
- getMachineMemOperand(MST->getPointerInfo(),
- MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
- SecondHalfAlignment, MST->getAAInfo(),
- MST->getRanges());
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MST->getPointerInfo().getWithOffset(HiOffset),
+ MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
+ MST->getAAInfo(), MST->getRanges());
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
MST->isTruncatingStore(),
@@ -6859,6 +7384,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+ SDValue Scale = MGT->getScale();
SDValue BasePtr = MGT->getBasePtr();
SDValue Index = MGT->getIndex();
SDValue IndexLo, IndexHi;
@@ -6869,13 +7395,13 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
- SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
+ SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale };
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
- MMO);
+ MMO);
- SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
+ SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale };
Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
- MMO);
+ MMO);
AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode());
@@ -6949,11 +7475,12 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
MLD->isExpandingLoad());
+ unsigned HiOffset = LoMemVT.getStoreSize();
- MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
- MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
- SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MLD->getPointerInfo().getWithOffset(HiOffset),
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
+ MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
ISD::NON_EXTLOAD, MLD->isExpandingLoad());
@@ -7071,6 +7598,36 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
}
+
+ // If this select has a condition (setcc) with narrower operands than the
+ // select, try to widen the compare to match the select width.
+ // TODO: This should be extended to handle any constant.
+ // TODO: This could be extended to handle non-loading patterns, but that
+ // requires thorough testing to avoid regressions.
+ if (isNullConstantOrNullSplatConstant(RHS)) {
+ EVT NarrowVT = LHS.getValueType();
+ EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
+ EVT SetCCVT = getSetCCResultType(LHS.getValueType());
+ unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
+ unsigned WideWidth = WideVT.getScalarSizeInBits();
+ bool IsSigned = isSignedIntSetCC(CC);
+ auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
+ SetCCWidth != 1 && SetCCWidth < WideWidth &&
+ TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
+ // Both compare operands can be widened for free. The LHS can use an
+ // extended load, and the RHS is a constant:
+ // vselect (ext (setcc load(X), C)), N1, N2 -->
+ // vselect (setcc extload(X), C'), N1, N2
+ auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
+ SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
+ EVT WideSetCCVT = getSetCCResultType(WideVT);
+ SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
+ return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
+ }
+ }
}
if (SimplifySelectOps(N, N1, N2))
@@ -7142,22 +7699,33 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
}
SDValue DAGCombiner::visitSETCC(SDNode *N) {
- return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
- cast<CondCodeSDNode>(N->getOperand(2))->get(),
- SDLoc(N));
-}
+ // setcc is very commonly used as an argument to brcond. This pattern
+ // also lend itself to numerous combines and, as a result, it is desired
+ // we keep the argument to a brcond as a setcc as much as possible.
+ bool PreferSetCC =
+ N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
-SDValue DAGCombiner::visitSETCCE(SDNode *N) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- SDValue Carry = N->getOperand(2);
- SDValue Cond = N->getOperand(3);
+ SDValue Combined = SimplifySetCC(
+ N->getValueType(0), N->getOperand(0), N->getOperand(1),
+ cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
- // If Carry is false, fold to a regular SETCC.
- if (Carry.getOpcode() == ISD::CARRY_FALSE)
- return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
+ if (!Combined)
+ return SDValue();
- return SDValue();
+ // If we prefer to have a setcc, and we don't, we'll try our best to
+ // recreate one using rebuildSetCC.
+ if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
+ SDValue NewSetCC = rebuildSetCC(Combined);
+
+ // We don't have anything interesting to combine to.
+ if (NewSetCC.getNode() == N)
+ return SDValue();
+
+ if (NewSetCC)
+ return NewSetCC;
+ }
+
+ return Combined;
}
SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
@@ -7237,12 +7805,12 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
// transformation. Returns true if extension are possible and the above
// mentioned transformation is profitable.
-static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
+static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
unsigned ExtOpc,
SmallVectorImpl<SDNode *> &ExtendNodes,
const TargetLowering &TLI) {
bool HasCopyToRegUses = false;
- bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
+ bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
UE = N0.getNode()->use_end();
UI != UE; ++UI) {
@@ -7298,16 +7866,16 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
}
void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
- SDValue Trunc, SDValue ExtLoad,
- const SDLoc &DL, ISD::NodeType ExtType) {
+ SDValue OrigLoad, SDValue ExtLoad,
+ ISD::NodeType ExtType) {
// Extend SetCC uses if necessary.
- for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
- SDNode *SetCC = SetCCs[i];
+ SDLoc DL(ExtLoad);
+ for (SDNode *SetCC : SetCCs) {
SmallVector<SDValue, 4> Ops;
for (unsigned j = 0; j != 2; ++j) {
SDValue SOp = SetCC->getOperand(j);
- if (SOp == Trunc)
+ if (SOp == OrigLoad)
Ops.push_back(ExtLoad);
else
Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
@@ -7356,7 +7924,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
return SDValue();
SmallVector<SDNode *, 4> SetCCs;
- if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
+ if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
return SDValue();
ISD::LoadExtType ExtType =
@@ -7387,7 +7955,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
SDValue SplitLoad = DAG.getExtLoad(
- ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
+ ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
@@ -7410,12 +7978,82 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
// with a truncate of the concatenated sextloaded vectors.
SDValue Trunc =
DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
+ ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
CombineTo(N0.getNode(), Trunc, NewChain);
- ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
- (ISD::NodeType)N->getOpcode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
+// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
+SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
+ assert(N->getOpcode() == ISD::ZERO_EXTEND);
+ EVT VT = N->getValueType(0);
+
+ // and/or/xor
+ SDValue N0 = N->getOperand(0);
+ if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) ||
+ N0.getOperand(1).getOpcode() != ISD::Constant ||
+ (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
+ return SDValue();
+
+ // shl/shr
+ SDValue N1 = N0->getOperand(0);
+ if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
+ N1.getOperand(1).getOpcode() != ISD::Constant ||
+ (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
+ return SDValue();
+
+ // load
+ if (!isa<LoadSDNode>(N1.getOperand(0)))
+ return SDValue();
+ LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
+ EVT MemVT = Load->getMemoryVT();
+ if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
+ Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
+ return SDValue();
+
+
+ // If the shift op is SHL, the logic op must be AND, otherwise the result
+ // will be wrong.
+ if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
+ ISD::ZERO_EXTEND, SetCCs, TLI))
+ return SDValue();
+
+ // Actually do the transformation.
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
+ Load->getChain(), Load->getBasePtr(),
+ Load->getMemoryVT(), Load->getMemOperand());
+
+ SDLoc DL1(N1);
+ SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
+ N1.getOperand(1));
+
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ SDLoc DL0(N0);
+ SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
+ DAG.getConstant(Mask, DL0, VT));
+
+ ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
+ CombineTo(N, And);
+ if (SDValue(Load, 0).hasOneUse()) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
+ Load->getValueType(0), ExtLoad);
+ CombineTo(Load, Trunc, ExtLoad.getValue(1));
+ }
+ return SDValue(N,0); // Return N so it doesn't get rechecked!
+}
+
/// If we're narrowing or widening the result of a vector select and the final
/// size is the same size as a setcc (compare) feeding the select, then try to
/// apply the cast operation to the select's operands because matching vector
@@ -7461,6 +8099,106 @@ SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
}
+// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
+// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
+static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
+ const TargetLowering &TLI, EVT VT,
+ bool LegalOperations, SDNode *N,
+ SDValue N0, ISD::LoadExtType ExtLoadType) {
+ SDNode *N0Node = N0.getNode();
+ bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
+ : ISD::isZEXTLoad(N0Node);
+ if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
+ !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
+ return {};
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ if ((LegalOperations || LN0->isVolatile()) &&
+ !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
+ return {};
+
+ SDValue ExtLoad =
+ DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
+ LN0->getBasePtr(), MemVT, LN0->getMemOperand());
+ Combiner.CombineTo(N, ExtLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
+// Only generate vector extloads when 1) they're legal, and 2) they are
+// deemed desirable by the target.
+static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
+ const TargetLowering &TLI, EVT VT,
+ bool LegalOperations, SDNode *N, SDValue N0,
+ ISD::LoadExtType ExtLoadType,
+ ISD::NodeType ExtOpc) {
+ if (!ISD::isNON_EXTLoad(N0.getNode()) ||
+ !ISD::isUNINDEXEDLoad(N0.getNode()) ||
+ ((LegalOperations || VT.isVector() ||
+ cast<LoadSDNode>(N0)->isVolatile()) &&
+ !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
+ return {};
+
+ bool DoXform = true;
+ SmallVector<SDNode *, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
+ if (VT.isVector())
+ DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
+ if (!DoXform)
+ return {};
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
+ Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
+ // If the load value is used only by N, replace it via CombineTo N.
+ bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
+ Combiner.CombineTo(N, ExtLoad);
+ if (NoReplaceTrunc) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ } else {
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
+ Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
+ bool LegalOperations) {
+ assert((N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
+
+ SDValue SetCC = N->getOperand(0);
+ if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
+ !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
+ return SDValue();
+
+ SDValue X = SetCC.getOperand(0);
+ SDValue Ones = SetCC.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
+ EVT VT = N->getValueType(0);
+ EVT XVT = X.getValueType();
+ // setge X, C is canonicalized to setgt, so we do not need to match that
+ // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
+ // not require the 'not' op.
+ if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
+ // Invert and smear/shift the sign bit:
+ // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
+ // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
+ SDLoc DL(N);
+ SDValue NotX = DAG.getNOT(DL, X, VT);
+ SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
+ auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
+ return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -7525,62 +8263,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
}
- // fold (sext (load x)) -> (sext (truncate (sextload x)))
- // Only generate vector extloads when 1) they're legal, and 2) they are
- // deemed desirable by the target.
- if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
- ((!LegalOperations && !VT.isVector() &&
- !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
- bool DoXform = true;
- SmallVector<SDNode*, 4> SetCCs;
- if (!N0.hasOneUse())
- DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
- if (VT.isVector())
- DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
- if (DoXform) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
- LN0->getBasePtr(), N0.getValueType(),
- LN0->getMemOperand());
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), ExtLoad);
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
- // If the load value is used only by N, replace it via CombineTo N.
- bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
- CombineTo(N, ExtLoad);
- if (NoReplaceTrunc)
- DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
- else
- CombineTo(LN0, Trunc, ExtLoad.getValue(1));
- return SDValue(N, 0);
- }
- }
+ // Try to simplify (sext (load x)).
+ if (SDValue foldedExt =
+ tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
+ ISD::SEXTLOAD, ISD::SIGN_EXTEND))
+ return foldedExt;
// fold (sext (load x)) to multiple smaller sextloads.
// Only on illegal but splittable vectors.
if (SDValue ExtLoad = CombineExtLoad(N))
return ExtLoad;
- // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
- // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
- if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
- ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT MemVT = LN0->getMemoryVT();
- if ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
- LN0->getBasePtr(), MemVT,
- LN0->getMemOperand());
- CombineTo(N, ExtLoad);
- CombineTo(N0.getNode(),
- DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), ExtLoad),
- ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
+ // Try to simplify (sext (sextload x)).
+ if (SDValue foldedExt = tryToFoldExtOfExtload(
+ DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
+ return foldedExt;
// fold (sext (and/or/xor (load x), cst)) ->
// (and/or/xor (sextload x), (sext cst))
@@ -7588,30 +8285,26 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
- if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
- bool DoXform = true;
+ LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
+ EVT MemVT = LN00->getMemoryVT();
+ if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
+ LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
SmallVector<SDNode*, 4> SetCCs;
- if (!N0.hasOneUse())
- DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
- SetCCs, TLI);
+ bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
+ ISD::SIGN_EXTEND, SetCCs, TLI);
if (DoXform) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- LN0->getMemoryVT(),
- LN0->getMemOperand());
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
+ LN00->getChain(), LN00->getBasePtr(),
+ LN00->getMemoryVT(),
+ LN00->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.sext(VT.getSizeInBits());
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
- SDLoc(N0.getOperand(0)),
- N0.getOperand(0).getValueType(), ExtLoad);
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
+ ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
bool NoReplaceTruncAnd = !N0.hasOneUse();
- bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
+ bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
CombineTo(N, And);
// If N0 has multiple uses, change other uses as well.
if (NoReplaceTruncAnd) {
@@ -7619,15 +8312,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
CombineTo(N0.getNode(), TruncAnd);
}
- if (NoReplaceTrunc)
- DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
- else
- CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ if (NoReplaceTrunc) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
+ LN00->getValueType(0), ExtLoad);
+ CombineTo(LN00, Trunc, ExtLoad.getValue(1));
+ }
return SDValue(N,0); // Return N so it doesn't get rechecked!
}
}
}
+ if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
+ return V;
+
if (N0.getOpcode() == ISD::SETCC) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
@@ -7674,8 +8373,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// If the type of the setcc is larger (say, i8) then the value of the high
// bit depends on getBooleanContents(), so ask TLI for a real "true" value
// of the appropriate width.
- SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
- : TLI.getConstTrueVal(DAG, VT, DL);
+ SDValue ExtTrueVal = (SetCCWidth == 1)
+ ? DAG.getAllOnesConstant(DL, VT)
+ : DAG.getBoolConstant(true, DL, VT, N00VT);
SDValue Zero = DAG.getConstant(0, DL, VT);
if (SDValue SCC =
SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
@@ -7792,13 +8492,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// Try to mask before the extension to avoid having to generate a larger mask,
// possibly over several sub-vectors.
- if (SrcVT.bitsLT(VT)) {
+ if (SrcVT.bitsLT(VT) && VT.isVector()) {
if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
SDValue Op = N0.getOperand(0);
Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
AddToWorklist(Op.getNode());
- return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+ SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+ // Transfer the debug info; the new node is equivalent to N0.
+ DAG.transferDbgValues(N0, ZExtOrTrunc);
+ return ZExtOrTrunc;
}
}
@@ -7830,39 +8533,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
X, DAG.getConstant(Mask, DL, VT));
}
- // fold (zext (load x)) -> (zext (truncate (zextload x)))
- // Only generate vector extloads when 1) they're legal, and 2) they are
- // deemed desirable by the target.
- if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
- ((!LegalOperations && !VT.isVector() &&
- !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
- bool DoXform = true;
- SmallVector<SDNode*, 4> SetCCs;
- if (!N0.hasOneUse())
- DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
- if (VT.isVector())
- DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
- if (DoXform) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
- LN0->getChain(),
- LN0->getBasePtr(), N0.getValueType(),
- LN0->getMemOperand());
-
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), ExtLoad);
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
- // If the load value is used only by N, replace it via CombineTo N.
- bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
- CombineTo(N, ExtLoad);
- if (NoReplaceTrunc)
- DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
- else
- CombineTo(LN0, Trunc, ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
+ // Try to simplify (zext (load x)).
+ if (SDValue foldedExt =
+ tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
+ ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
+ return foldedExt;
// fold (zext (load x)) to multiple smaller zextloads.
// Only on illegal but splittable vectors.
@@ -7877,10 +8552,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
- if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
+ LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
+ EVT MemVT = LN00->getMemoryVT();
+ if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
+ LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse()) {
@@ -7888,29 +8564,26 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
EVT LoadResultTy = AndC->getValueType(0);
EVT ExtVT;
- if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT))
+ if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
DoXform = false;
}
- if (DoXform)
- DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
- ISD::ZERO_EXTEND, SetCCs, TLI);
}
+ if (DoXform)
+ DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
+ ISD::ZERO_EXTEND, SetCCs, TLI);
if (DoXform) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- LN0->getMemoryVT(),
- LN0->getMemOperand());
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
+ LN00->getChain(), LN00->getBasePtr(),
+ LN00->getMemoryVT(),
+ LN00->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
SDLoc DL(N);
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
- SDLoc(N0.getOperand(0)),
- N0.getOperand(0).getValueType(), ExtLoad);
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
+ ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
bool NoReplaceTruncAnd = !N0.hasOneUse();
- bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
+ bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
CombineTo(N, And);
// If N0 has multiple uses, change other uses as well.
if (NoReplaceTruncAnd) {
@@ -7918,35 +8591,30 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
CombineTo(N0.getNode(), TruncAnd);
}
- if (NoReplaceTrunc)
- DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
- else
- CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ if (NoReplaceTrunc) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
+ LN00->getValueType(0), ExtLoad);
+ CombineTo(LN00, Trunc, ExtLoad.getValue(1));
+ }
return SDValue(N,0); // Return N so it doesn't get rechecked!
}
}
}
- // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
- // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
- if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
- ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT MemVT = LN0->getMemoryVT();
- if ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
- LN0->getChain(),
- LN0->getBasePtr(), MemVT,
- LN0->getMemOperand());
- CombineTo(N, ExtLoad);
- CombineTo(N0.getNode(),
- DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
- ExtLoad),
- ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
+ // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
+ // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
+ if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
+ return ZExtLoad;
+
+ // Try to simplify (zext (zextload x)).
+ if (SDValue foldedExt = tryToFoldExtOfExtload(
+ DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
+ return foldedExt;
+
+ if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
+ return V;
if (N0.getOpcode() == ISD::SETCC) {
// Only do this before legalize for now.
@@ -8084,24 +8752,25 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
- DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
+ DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
+ TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), ExtLoad);
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
- ISD::ANY_EXTEND);
+ ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
// If the load value is used only by N, replace it via CombineTo N.
bool NoReplaceTrunc = N0.hasOneUse();
CombineTo(N, ExtLoad);
- if (NoReplaceTrunc)
+ if (NoReplaceTrunc) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
- else
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
+ N0.getValueType(), ExtLoad);
CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ }
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -8109,9 +8778,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// fold (aext (zextload x)) -> (aext (truncate (zextload x)))
// fold (aext (sextload x)) -> (aext (truncate (sextload x)))
// fold (aext ( extload x)) -> (aext (truncate (extload x)))
- if (N0.getOpcode() == ISD::LOAD &&
- !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
- N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
ISD::LoadExtType ExtType = LN0->getExtensionType();
EVT MemVT = LN0->getMemoryVT();
@@ -8120,10 +8788,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
VT, LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
CombineTo(N, ExtLoad);
- CombineTo(N0.getNode(),
- DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), ExtLoad),
- ExtLoad.getValue(1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -8263,8 +8928,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
unsigned ShAmt = 0;
if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
- if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- ShAmt = N01->getZExtValue();
+ SDValue SRL = N0;
+ if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
+ ShAmt = ConstShift->getZExtValue();
unsigned EVTBits = ExtVT.getSizeInBits();
// Is the shift amount a multiple of size of VT?
if ((ShAmt & (EVTBits-1)) == 0) {
@@ -8277,17 +8943,36 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
// At this point, we must have a load or else we can't do the transform.
if (!isa<LoadSDNode>(N0)) return SDValue();
+ auto *LN0 = cast<LoadSDNode>(N0);
+
// Because a SRL must be assumed to *need* to zero-extend the high bits
// (as opposed to anyext the high bits), we can't combine the zextload
// lowering of SRL and an sextload.
- if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
+ if (LN0->getExtensionType() == ISD::SEXTLOAD)
return SDValue();
// If the shift amount is larger than the input type then we're not
// accessing any of the loaded bytes. If the load was a zextload/extload
// then the result of the shift+trunc is zero/undef (handled elsewhere).
- if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
+ if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
return SDValue();
+
+ // If the SRL is only used by a masking AND, we may be able to adjust
+ // the ExtVT to make the AND redundant.
+ SDNode *Mask = *(SRL->use_begin());
+ if (Mask->getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Mask->getOperand(1))) {
+ const APInt &ShiftMask =
+ cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
+ if (ShiftMask.isMask()) {
+ EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
+ ShiftMask.countTrailingOnes());
+ // If the mask is smaller, recompute the type.
+ if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
+ TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
+ ExtVT = MaskedVT;
+ }
+ }
}
}
@@ -8307,7 +8992,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt))
+ if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
return SDValue();
// For big endian targets, we need to adjust the offset to the pointer to
@@ -8403,7 +9088,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
}
- // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
+ // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
@@ -8777,6 +9462,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
}
+ // fold (truncate (extract_subvector(ext x))) ->
+ // (extract_subvector x)
+ // TODO: This can be generalized to cover cases where the truncate and extract
+ // do not fully cancel each other out.
+ if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::SIGN_EXTEND ||
+ N00.getOpcode() == ISD::ZERO_EXTEND ||
+ N00.getOpcode() == ISD::ANY_EXTEND) {
+ if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
+ VT.getVectorElementType())
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
+ N00.getOperand(0), N0.getOperand(1));
+ }
+ }
+
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
@@ -8897,17 +9598,17 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
}
// If the input is a constant, let getNode fold it.
- if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
- // If we can't allow illegal operations, we need to check that this is just
- // a fp -> int or int -> conversion and that the resulting operation will
- // be legal.
- if (!LegalOperations ||
- (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
- TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
- (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
- TLI.isOperationLegal(ISD::Constant, VT)))
- return DAG.getBitcast(VT, N0);
- }
+ // We always need to check that this is just a fp -> int or int -> conversion
+ // otherwise we will get back N which will confuse the caller into thinking
+ // we used CombineTo. This can block target combines from running. If we can't
+ // allowed legal operations, we need to ensure the resulting operation will be
+ // legal.
+ // TODO: Maybe we should check that the return value isn't N explicitly?
+ if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) ||
+ (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT))))
+ return DAG.getBitcast(VT, N0);
// (conv (conv x, t1), t2) -> (conv x, t2)
if (N0.getOpcode() == ISD::BITCAST)
@@ -9253,7 +9954,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
static bool isContractable(SDNode *N) {
SDNodeFlags F = N->getFlags();
- return F.hasAllowContract() || F.hasUnsafeAlgebra();
+ return F.hasAllowContract() || F.hasAllowReassociation();
}
/// Try to perform FMA combining on a given FADD node.
@@ -9277,8 +9978,10 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (!HasFMAD && !HasFMA)
return SDValue();
+ SDNodeFlags Flags = N->getFlags();
+ bool CanFuse = Options.UnsafeFPMath || isContractable(N);
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Options.UnsafeFPMath || HasFMAD);
+ CanFuse || HasFMAD);
// If the addition is not contractable, do not combine.
if (!AllowFusionGlobally && !isContractable(N))
return SDValue();
@@ -9308,14 +10011,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1), N1);
+ N0.getOperand(0), N0.getOperand(1), N1, Flags);
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N1.getOperand(0), N1.getOperand(1), N0);
+ N1.getOperand(0), N1.getOperand(1), N0, Flags);
}
// Look through FP_EXTEND nodes to do more combining.
@@ -9329,7 +10032,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)), N1);
+ N00.getOperand(1)), N1, Flags);
}
}
@@ -9343,16 +10046,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(1)), N0);
+ N10.getOperand(1)), N0, Flags);
}
}
// More folding opportunities when target permits.
if (Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
- // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
- // are currently only supported on binary nodes.
- if (Options.UnsafeFPMath &&
+ if (CanFuse &&
N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
@@ -9361,13 +10062,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
- N1));
+ N1, Flags), Flags);
}
// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
- // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
- // are currently only supported on binary nodes.
- if (Options.UnsafeFPMath &&
+ if (CanFuse &&
N1->getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL &&
N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
@@ -9376,19 +10075,20 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(2).getOperand(0),
N1.getOperand(2).getOperand(1),
- N0));
+ N0, Flags), Flags);
}
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
+ SDNodeFlags Flags) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z));
+ Z, Flags), Flags);
};
if (N0.getOpcode() == PreferredFusedOpcode) {
SDValue N02 = N0.getOperand(2);
@@ -9398,7 +10098,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
N020.getOperand(0), N020.getOperand(1),
- N1);
+ N1, Flags);
}
}
}
@@ -9409,14 +10109,15 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
auto FoldFAddFPExtFMAFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
+ SDNodeFlags Flags) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z));
+ Z, Flags), Flags);
};
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
@@ -9426,7 +10127,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
N002.getOperand(0), N002.getOperand(1),
- N1);
+ N1, Flags);
}
}
}
@@ -9441,7 +10142,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
N120.getOperand(0), N120.getOperand(1),
- N0);
+ N0, Flags);
}
}
}
@@ -9459,7 +10160,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
N102.getOperand(0), N102.getOperand(1),
- N0);
+ N0, Flags);
}
}
}
@@ -9488,8 +10189,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (!HasFMAD && !HasFMA)
return SDValue();
+ const SDNodeFlags Flags = N->getFlags();
+ bool CanFuse = Options.UnsafeFPMath || isContractable(N);
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Options.UnsafeFPMath || HasFMAD);
+ CanFuse || HasFMAD);
+
// If the subtraction is not contractable, do not combine.
if (!AllowFusionGlobally && !isContractable(N))
return SDValue();
@@ -9514,16 +10218,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+ DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
}
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
- if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
+ if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),
- N1.getOperand(1), N0);
+ N1.getOperand(1), N0, Flags);
+ }
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
@@ -9532,7 +10237,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+ DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
}
// Look through FP_EXTEND nodes to do more combining.
@@ -9548,7 +10253,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N00.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+ DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
}
}
@@ -9565,7 +10270,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N10.getOperand(0))),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(1)),
- N0);
+ N0, Flags);
}
}
@@ -9587,7 +10292,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N000.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N000.getOperand(1)),
- N1));
+ N1, Flags));
}
}
}
@@ -9610,7 +10315,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N000.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N000.getOperand(1)),
- N1));
+ N1, Flags));
}
}
}
@@ -9619,9 +10324,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
- // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
- // are currently only supported on binary nodes.
- if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
+ if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -9630,14 +10333,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT,
- N1)));
+ N1), Flags), Flags);
}
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
- // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
- // are currently only supported on binary nodes.
- if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
+ if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
isContractableFMUL(N1.getOperand(2))) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
@@ -9647,8 +10348,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N1.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N20),
-
- N21, N0));
+ N21, N0, Flags), Flags);
}
@@ -9668,7 +10368,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N020.getOperand(1)),
DAG.getNode(ISD::FNEG, SL, VT,
- N1)));
+ N1), Flags), Flags);
}
}
}
@@ -9696,7 +10396,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N002.getOperand(1)),
DAG.getNode(ISD::FNEG, SL, VT,
- N1)));
+ N1), Flags), Flags);
}
}
}
@@ -9719,7 +10419,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
VT, N1200)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N1201),
- N0));
+ N0, Flags), Flags);
}
}
@@ -9750,7 +10450,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
VT, N1020)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N1021),
- N0));
+ N0, Flags), Flags);
}
}
}
@@ -9766,6 +10466,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
+ const SDNodeFlags Flags = N->getFlags();
assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
@@ -9797,52 +10498,54 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
// fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
// fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
- auto FuseFADD = [&](SDValue X, SDValue Y) {
+ auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
if (XC1 && XC1->isExactlyValue(+1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ Y, Flags);
if (XC1 && XC1->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y));
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
}
return SDValue();
};
- if (SDValue FMA = FuseFADD(N0, N1))
+ if (SDValue FMA = FuseFADD(N0, N1, Flags))
return FMA;
- if (SDValue FMA = FuseFADD(N1, N0))
+ if (SDValue FMA = FuseFADD(N1, N0, Flags))
return FMA;
// fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
// fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
// fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
// fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
- auto FuseFSUB = [&](SDValue X, SDValue Y) {
+ auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
if (XC0 && XC0->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- Y);
+ Y, Flags);
if (XC0 && XC0->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y));
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
if (XC1 && XC1->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y));
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
if (XC1 && XC1->isExactlyValue(-1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ Y, Flags);
}
return SDValue();
};
- if (SDValue FMA = FuseFSUB(N0, N1))
+ if (SDValue FMA = FuseFSUB(N0, N1, Flags))
return FMA;
- if (SDValue FMA = FuseFSUB(N1, N0))
+ if (SDValue FMA = FuseFSUB(N1, N0, Flags))
return FMA;
return SDValue();
@@ -9904,35 +10607,42 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
}
- // FIXME: Auto-upgrade the target/function-level option.
- if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
- // fold (fadd A, 0) -> A
- if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
- if (N1C->isZero())
- return N0;
+ ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1);
+ if (N1C && N1C->isZero()) {
+ if (N1C->isNegative() || Options.UnsafeFPMath ||
+ Flags.hasNoSignedZeros()) {
+ // fold (fadd A, 0) -> A
+ return N0;
+ }
}
- // If 'unsafe math' is enabled, fold lots of things.
- if (Options.UnsafeFPMath) {
- // No FP constant should be created after legalization as Instruction
- // Selection pass has a hard time dealing with FP constants.
- bool AllowNewConst = (Level < AfterLegalizeDAG);
-
- // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
- if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
- isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
- return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
- DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
- Flags),
- Flags);
+ // No FP constant should be created after legalization as Instruction
+ // Selection pass has a hard time dealing with FP constants.
+ bool AllowNewConst = (Level < AfterLegalizeDAG);
+ // If 'unsafe math' or nnan is enabled, fold lots of things.
+ if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
// If allowed, fold (fadd (fneg x), x) -> 0.0
- if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
+ if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
return DAG.getConstantFP(0.0, DL, VT);
// If allowed, fold (fadd x, (fneg x)) -> 0.0
- if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
+ if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
return DAG.getConstantFP(0.0, DL, VT);
+ }
+
+ // If 'unsafe math' or reassoc and nsz, fold lots of things.
+ // TODO: break out portions of the transformations below for which Unsafe is
+ // considered and which do not require both nsz and reassoc
+ if ((Options.UnsafeFPMath ||
+ (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
+ AllowNewConst) {
+ // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
+ if (N1CFP && N0.getOpcode() == ISD::FADD &&
+ isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
+ return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
+ }
// We can fold chains of FADD's of the same value into multiplications.
// This transform is not safe in general because we are reducing the number
@@ -9980,7 +10690,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
}
- if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
+ if (N0.getOpcode() == ISD::FADD) {
bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
@@ -9990,7 +10700,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
}
- if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
+ if (N1.getOpcode() == ISD::FADD) {
bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
@@ -10001,8 +10711,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
// (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
- if (AllowNewConst &&
- N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
+ if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
@@ -10042,15 +10751,23 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- // fold (fsub A, (fneg B)) -> (fadd A, B)
- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
- return DAG.getNode(ISD::FADD, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations), Flags);
+ // (fsub A, 0) -> A
+ if (N1CFP && N1CFP->isZero()) {
+ if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
+ Flags.hasNoSignedZeros()) {
+ return N0;
+ }
+ }
- // FIXME: Auto-upgrade the target/function-level option.
- if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
- // (fsub 0, B) -> -B
- if (N0CFP && N0CFP->isZero()) {
+ if (N0 == N1) {
+ // (fsub x, x) -> 0.0
+ if (Options.UnsafeFPMath || Flags.hasNoNaNs())
+ return DAG.getConstantFP(0.0f, DL, VT);
+ }
+
+ // (fsub 0, B) -> -B
+ if (N0CFP && N0CFP->isZero()) {
+ if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
@@ -10058,16 +10775,13 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
}
+ // fold (fsub A, (fneg B)) -> (fadd A, B)
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
+ return DAG.getNode(ISD::FADD, DL, VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations), Flags);
+
// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
- // (fsub A, 0) -> A
- if (N1CFP && N1CFP->isZero())
- return N0;
-
- // (fsub x, x) -> 0.0
- if (N0 == N1)
- return DAG.getConstantFP(0.0f, DL, VT);
-
// (fsub x, (fadd x, y)) -> (fneg y)
// (fsub x, (fadd y, x)) -> (fneg y)
if (N1.getOpcode() == ISD::FADD) {
@@ -10124,12 +10838,15 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if (Options.UnsafeFPMath) {
+ if (Options.UnsafeFPMath ||
+ (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
// fold (fmul A, 0) -> 0
if (N1CFP && N1CFP->isZero())
return N1;
+ }
- // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+ if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
+ // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
if (N0.getOpcode() == ISD::FMUL) {
// Fold scalars or any vector constants (not just splats).
// This fold is done in general by InstCombine, but extra fmul insts
@@ -10153,13 +10870,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
}
}
- // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
- // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
- // during an early run of DAGCombiner can prevent folding with fmuls
- // inserted during lowering.
- if (N0.getOpcode() == ISD::FADD &&
- (N0.getOperand(0) == N0.getOperand(1)) &&
- N0.hasOneUse()) {
+ // Match a special-case: we convert X * 2.0 into fadd.
+ // fmul (fadd X, X), C -> fmul X, 2.0 * C
+ if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
+ N0.getOperand(0) == N0.getOperand(1)) {
const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
@@ -10253,6 +10967,10 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ // FMA nodes have flags that propagate to the created nodes.
+ const SDNodeFlags Flags = N->getFlags();
+ bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
+
// Constant fold FMA.
if (isa<ConstantFPSDNode>(N0) &&
isa<ConstantFPSDNode>(N1) &&
@@ -10260,7 +10978,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
}
- if (Options.UnsafeFPMath) {
+ if (UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
return N2;
if (N1CFP && N1CFP->isZero())
@@ -10277,12 +10995,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
!isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
- // TODO: FMA nodes should have flags that propagate to the created nodes.
- // For now, create a Flags object for use with all unsafe math transforms.
- SDNodeFlags Flags;
- Flags.setUnsafeAlgebra(true);
-
- if (Options.UnsafeFPMath) {
+ if (UnsafeFPMath) {
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
isConstantFPBuildVectorOrConstantFP(N1) &&
@@ -10328,7 +11041,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
}
}
- if (Options.UnsafeFPMath) {
+ if (UnsafeFPMath) {
// (fma x, c, x) -> (fmul x, (c+1))
if (N1CFP && N0 == N2) {
return DAG.getNode(ISD::FMUL, DL, VT, N0,
@@ -10435,7 +11148,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if (Options.UnsafeFPMath) {
+ if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
if (N1CFP) {
// Compute the reciprocal 1.0 / c2.
@@ -10544,17 +11257,16 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
}
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
- if (!DAG.getTarget().Options.UnsafeFPMath)
+ SDNodeFlags Flags = N->getFlags();
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
+ !Flags.hasApproximateFuncs())
return SDValue();
SDValue N0 = N->getOperand(0);
if (TLI.isFsqrtCheap(N0, DAG))
return SDValue();
- // TODO: FSQRT nodes should have flags that propagate to the created nodes.
- // For now, create a Flags object for use with all unsafe math transforms.
- SDNodeFlags Flags;
- Flags.setUnsafeAlgebra(true);
+ // FSQRT nodes have flags that propagate to the created nodes.
return buildSqrtEstimate(N0, Flags);
}
@@ -10622,6 +11334,41 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
return SDValue();
}
+static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // This optimization is guarded by a function attribute because it may produce
+ // unexpected results. Ie, programs may be relying on the platform-specific
+ // undefined behavior when the float-to-int conversion overflows.
+ const Function &F = DAG.getMachineFunction().getFunction();
+ Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
+ if (StrictOverflow.getValueAsString().equals("false"))
+ return SDValue();
+
+ // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
+ // replacing casts with a libcall. We also must be allowed to ignore -0.0
+ // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
+ // conversions would return +0.0.
+ // FIXME: We should be able to use node-level FMF here.
+ // TODO: If strict math, should we use FABS (+ range check for signed cast)?
+ EVT VT = N->getValueType(0);
+ if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
+ !DAG.getTarget().Options.NoSignedZerosFPMath)
+ return SDValue();
+
+ // fptosi/fptoui round towards zero, so converting from FP to integer and
+ // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
+ SDValue N0 = N->getOperand(0);
+ if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
+ N0.getOperand(0).getValueType() == VT)
+ return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
+
+ if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
+ N0.getOperand(0).getValueType() == VT)
+ return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -10673,6 +11420,9 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
}
}
+ if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
+ return FTrunc;
+
return SDValue();
}
@@ -10712,6 +11462,9 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
}
}
+ if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
+ return FTrunc;
+
return SDValue();
}
@@ -11118,16 +11871,22 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
N1.getOperand(0), N1.getOperand(1), N2);
}
- if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
- ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
- (N1.getOperand(0).hasOneUse() &&
- N1.getOperand(0).getOpcode() == ISD::SRL))) {
- SDNode *Trunc = nullptr;
- if (N1.getOpcode() == ISD::TRUNCATE) {
- // Look pass the truncate.
- Trunc = N1.getNode();
- N1 = N1.getOperand(0);
- }
+ if (N1.hasOneUse()) {
+ if (SDValue NewN1 = rebuildSetCC(N1))
+ return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::rebuildSetCC(SDValue N) {
+ if (N.getOpcode() == ISD::SRL ||
+ (N.getOpcode() == ISD::TRUNCATE &&
+ (N.getOperand(0).hasOneUse() &&
+ N.getOperand(0).getOpcode() == ISD::SRL))) {
+ // Look pass the truncate.
+ if (N.getOpcode() == ISD::TRUNCATE)
+ N = N.getOperand(0);
// Match this pattern so that we can generate simpler code:
//
@@ -11146,74 +11905,55 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
// This applies only when the AND constant value has one bit set and the
// SRL constant is equal to the log2 of the AND constant. The back-end is
// smart enough to convert the result into a TEST/JMP sequence.
- SDValue Op0 = N1.getOperand(0);
- SDValue Op1 = N1.getOperand(1);
+ SDValue Op0 = N.getOperand(0);
+ SDValue Op1 = N.getOperand(1);
- if (Op0.getOpcode() == ISD::AND &&
- Op1.getOpcode() == ISD::Constant) {
+ if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
SDValue AndOp1 = Op0.getOperand(1);
if (AndOp1.getOpcode() == ISD::Constant) {
const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
if (AndConst.isPowerOf2() &&
- cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+ cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
SDLoc DL(N);
- SDValue SetCC =
- DAG.getSetCC(DL,
- getSetCCResultType(Op0.getValueType()),
- Op0, DAG.getConstant(0, DL, Op0.getValueType()),
- ISD::SETNE);
-
- SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
- MVT::Other, Chain, SetCC, N2);
- // Don't add the new BRCond into the worklist or else SimplifySelectCC
- // will convert it back to (X & C1) >> C2.
- CombineTo(N, NewBRCond, false);
- // Truncate is dead.
- if (Trunc)
- deleteAndRecombine(Trunc);
- // Replace the uses of SRL with SETCC
- WorklistRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
- deleteAndRecombine(N1.getNode());
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, DL, Op0.getValueType()),
+ ISD::SETNE);
}
}
}
-
- if (Trunc)
- // Restore N1 if the above transformation doesn't match.
- N1 = N->getOperand(1);
}
// Transform br(xor(x, y)) -> br(x != y)
// Transform br(xor(xor(x,y), 1)) -> br (x == y)
- if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
- SDNode *TheXor = N1.getNode();
+ if (N.getOpcode() == ISD::XOR) {
+ // Because we may call this on a speculatively constructed
+ // SimplifiedSetCC Node, we need to simplify this node first.
+ // Ideally this should be folded into SimplifySetCC and not
+ // here. For now, grab a handle to N so we don't lose it from
+ // replacements interal to the visit.
+ HandleSDNode XORHandle(N);
+ while (N.getOpcode() == ISD::XOR) {
+ SDValue Tmp = visitXOR(N.getNode());
+ // No simplification done.
+ if (!Tmp.getNode())
+ break;
+ // Returning N is form in-visit replacement that may invalidated
+ // N. Grab value from Handle.
+ if (Tmp.getNode() == N.getNode())
+ N = XORHandle.getValue();
+ else // Node simplified. Try simplifying again.
+ N = Tmp;
+ }
+
+ if (N.getOpcode() != ISD::XOR)
+ return N;
+
+ SDNode *TheXor = N.getNode();
+
SDValue Op0 = TheXor->getOperand(0);
SDValue Op1 = TheXor->getOperand(1);
- if (Op0.getOpcode() == Op1.getOpcode()) {
- // Avoid missing important xor optimizations.
- if (SDValue Tmp = visitXOR(TheXor)) {
- if (Tmp.getNode() != TheXor) {
- DEBUG(dbgs() << "\nReplacing.8 ";
- TheXor->dump(&DAG);
- dbgs() << "\nWith: ";
- Tmp.getNode()->dump(&DAG);
- dbgs() << '\n');
- WorklistRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
- deleteAndRecombine(TheXor);
- return DAG.getNode(ISD::BRCOND, SDLoc(N),
- MVT::Other, Chain, Tmp, N2);
- }
-
- // visitXOR has changed XOR's operands or replaced the XOR completely,
- // bail out.
- return SDValue(N, 0);
- }
- }
if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
bool Equal = false;
@@ -11223,19 +11963,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Equal = true;
}
- EVT SetCCVT = N1.getValueType();
+ EVT SetCCVT = N.getValueType();
if (LegalTypes)
SetCCVT = getSetCCResultType(SetCCVT);
- SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
- SetCCVT,
- Op0, Op1,
- Equal ? ISD::SETEQ : ISD::SETNE);
// Replace the uses of XOR with SETCC
- WorklistRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
- deleteAndRecombine(N1.getNode());
- return DAG.getNode(ISD::BRCOND, SDLoc(N),
- MVT::Other, Chain, SetCC, N2);
+ return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
+ Equal ? ISD::SETEQ : ISD::SETNE);
}
}
@@ -11467,11 +12200,8 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
BasePtr, Offset, AM);
++PreIndexedNodes;
++NodesCombined;
- DEBUG(dbgs() << "\nReplacing.4 ";
- N->dump(&DAG);
- dbgs() << "\nWith: ";
- Result.getNode()->dump(&DAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
+ Result.getNode()->dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
if (isLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
@@ -11636,11 +12366,9 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
BasePtr, Offset, AM);
++PostIndexedNodes;
++NodesCombined;
- DEBUG(dbgs() << "\nReplacing.5 ";
- N->dump(&DAG);
- dbgs() << "\nWith: ";
- Result.getNode()->dump(&DAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
+ dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
WorklistRemover DeadNodes(*this);
if (isLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
@@ -11664,7 +12392,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
return false;
}
-/// \brief Return the base-pointer arithmetic from an indexed \p LD.
+/// Return the base-pointer arithmetic from an indexed \p LD.
SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
ISD::MemIndexedMode AM = LD->getAddressingMode();
assert(AM != ISD::UNINDEXED);
@@ -11706,11 +12434,9 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// v3 = add v2, c
// Now we replace use of chain2 with chain1. This makes the second load
// isomorphic to the one we are deleting, and thus makes this load live.
- DEBUG(dbgs() << "\nReplacing.6 ";
- N->dump(&DAG);
- dbgs() << "\nWith chain: ";
- Chain.getNode()->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
+ dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
+ dbgs() << "\n");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
AddUsersToWorklist(Chain.getNode());
@@ -11741,11 +12467,9 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
AddUsersToWorklist(N);
} else
Index = DAG.getUNDEF(N->getValueType(1));
- DEBUG(dbgs() << "\nReplacing.7 ";
- N->dump(&DAG);
- dbgs() << "\nWith: ";
- Undef.getNode()->dump(&DAG);
- dbgs() << " and 2 other values\n");
+ LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
+ dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
+ dbgs() << " and 2 other values\n");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
@@ -11773,13 +12497,14 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Try to infer better alignment information than the load already has.
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
- if (Align > LD->getMemOperand()->getBaseAlignment()) {
+ if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
SDValue NewLoad = DAG.getExtLoad(
LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
LD->getPointerInfo(), LD->getMemoryVT(), Align,
LD->getMemOperand()->getFlags(), LD->getAAInfo());
- if (NewLoad.getNode() != N)
- return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
+ // NewLoad will always be N as we are only refining the alignment
+ assert(NewLoad.getNode() == N);
+ (void)NewLoad;
}
}
}
@@ -11826,7 +12551,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
namespace {
-/// \brief Helper structure used to slice a load in smaller loads.
+/// Helper structure used to slice a load in smaller loads.
/// Basically a slice is obtained from the following sequence:
/// Origin = load Ty1, Base
/// Shift = srl Ty1 Origin, CstTy Amount
@@ -11839,7 +12564,7 @@ namespace {
/// SliceTy is deduced from the number of bits that are actually used to
/// build Inst.
struct LoadedSlice {
- /// \brief Helper structure used to compute the cost of a slice.
+ /// Helper structure used to compute the cost of a slice.
struct Cost {
/// Are we optimizing for code size.
bool ForCodeSize;
@@ -11853,7 +12578,7 @@ struct LoadedSlice {
Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
- /// \brief Get the cost of one isolated slice.
+ /// Get the cost of one isolated slice.
Cost(const LoadedSlice &LS, bool ForCodeSize = false)
: ForCodeSize(ForCodeSize), Loads(1) {
EVT TruncType = LS.Inst->getValueType(0);
@@ -11863,7 +12588,7 @@ struct LoadedSlice {
ZExts = 1;
}
- /// \brief Account for slicing gain in the current cost.
+ /// Account for slicing gain in the current cost.
/// Slicing provide a few gains like removing a shift or a
/// truncate. This method allows to grow the cost of the original
/// load with the gain from this slice.
@@ -11936,7 +12661,7 @@ struct LoadedSlice {
unsigned Shift = 0, SelectionDAG *DAG = nullptr)
: Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
- /// \brief Get the bits used in a chunk of bits \p BitWidth large.
+ /// Get the bits used in a chunk of bits \p BitWidth large.
/// \return Result is \p BitWidth and has used bits set to 1 and
/// not used bits set to 0.
APInt getUsedBits() const {
@@ -11956,14 +12681,14 @@ struct LoadedSlice {
return UsedBits;
}
- /// \brief Get the size of the slice to be loaded in bytes.
+ /// Get the size of the slice to be loaded in bytes.
unsigned getLoadedSize() const {
unsigned SliceSize = getUsedBits().countPopulation();
assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
return SliceSize / 8;
}
- /// \brief Get the type that will be loaded for this slice.
+ /// Get the type that will be loaded for this slice.
/// Note: This may not be the final type for the slice.
EVT getLoadedType() const {
assert(DAG && "Missing context");
@@ -11971,7 +12696,7 @@ struct LoadedSlice {
return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
}
- /// \brief Get the alignment of the load used for this slice.
+ /// Get the alignment of the load used for this slice.
unsigned getAlignment() const {
unsigned Alignment = Origin->getAlignment();
unsigned Offset = getOffsetFromBase();
@@ -11980,7 +12705,7 @@ struct LoadedSlice {
return Alignment;
}
- /// \brief Check if this slice can be rewritten with legal operations.
+ /// Check if this slice can be rewritten with legal operations.
bool isLegal() const {
// An invalid slice is not legal.
if (!Origin || !Inst || !DAG)
@@ -12024,7 +12749,7 @@ struct LoadedSlice {
return true;
}
- /// \brief Get the offset in bytes of this slice in the original chunk of
+ /// Get the offset in bytes of this slice in the original chunk of
/// bits.
/// \pre DAG != nullptr.
uint64_t getOffsetFromBase() const {
@@ -12045,7 +12770,7 @@ struct LoadedSlice {
return Offset;
}
- /// \brief Generate the sequence of instructions to load the slice
+ /// Generate the sequence of instructions to load the slice
/// represented by this object and redirect the uses of this slice to
/// this new sequence of instructions.
/// \pre this->Inst && this->Origin are valid Instructions and this
@@ -12083,7 +12808,7 @@ struct LoadedSlice {
return LastInst;
}
- /// \brief Check if this slice can be merged with an expensive cross register
+ /// Check if this slice can be merged with an expensive cross register
/// bank copy. E.g.,
/// i = load i32
/// f = bitcast i32 i to float
@@ -12132,7 +12857,7 @@ struct LoadedSlice {
} // end anonymous namespace
-/// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
+/// Check that all bits set in \p UsedBits form a dense region, i.e.,
/// \p UsedBits looks like 0..0 1..1 0..0.
static bool areUsedBitsDense(const APInt &UsedBits) {
// If all the bits are one, this is dense!
@@ -12148,7 +12873,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) {
return NarrowedUsedBits.isAllOnesValue();
}
-/// \brief Check whether or not \p First and \p Second are next to each other
+/// Check whether or not \p First and \p Second are next to each other
/// in memory. This means that there is no hole between the bits loaded
/// by \p First and the bits loaded by \p Second.
static bool areSlicesNextToEachOther(const LoadedSlice &First,
@@ -12162,7 +12887,7 @@ static bool areSlicesNextToEachOther(const LoadedSlice &First,
return areUsedBitsDense(UsedBits);
}
-/// \brief Adjust the \p GlobalLSCost according to the target
+/// Adjust the \p GlobalLSCost according to the target
/// paring capabilities and the layout of the slices.
/// \pre \p GlobalLSCost should account for at least as many loads as
/// there is in the slices in \p LoadedSlices.
@@ -12175,8 +12900,8 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
// Sort the slices so that elements that are likely to be next to each
// other in memory are next to each other in the list.
- std::sort(LoadedSlices.begin(), LoadedSlices.end(),
- [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
+ llvm::sort(LoadedSlices.begin(), LoadedSlices.end(),
+ [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
});
@@ -12223,7 +12948,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
}
}
-/// \brief Check the profitability of all involved LoadedSlice.
+/// Check the profitability of all involved LoadedSlice.
/// Currently, it is considered profitable if there is exactly two
/// involved slices (1) which are (2) next to each other in memory, and
/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
@@ -12267,7 +12992,7 @@ static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
return OrigCost > GlobalSlicingCost;
}
-/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
+/// If the given load, \p LI, is used only by trunc or trunc(lshr)
/// operations, split it in the various pieces being extracted.
///
/// This sort of thing is introduced by SROA.
@@ -12386,22 +13111,6 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
- // The store should be chained directly to the load or be an operand of a
- // tokenfactor.
- if (LD == Chain.getNode())
- ; // ok.
- else if (Chain->getOpcode() != ISD::TokenFactor)
- return Result; // Fail.
- else {
- bool isOk = false;
- for (const SDValue &ChainOp : Chain->op_values())
- if (ChainOp.getNode() == LD) {
- isOk = true;
- break;
- }
- if (!isOk) return Result;
- }
-
// This only handles simple types.
if (V.getValueType() != MVT::i16 &&
V.getValueType() != MVT::i32 &&
@@ -12438,6 +13147,24 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
// is aligned the same as the access width.
if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
+ // For narrowing to be valid, it must be the case that the load the
+ // immediately preceeding memory operation before the store.
+ if (LD == Chain.getNode())
+ ; // ok.
+ else if (Chain->getOpcode() == ISD::TokenFactor &&
+ SDValue(LD, 1).hasOneUse()) {
+ // LD has only 1 chain use so they are no indirect dependencies.
+ bool isOk = false;
+ for (const SDValue &ChainOp : Chain->op_values())
+ if (ChainOp.getNode() == LD) {
+ isOk = true;
+ break;
+ }
+ if (!isOk)
+ return Result;
+ } else
+ return Result; // Fail.
+
Result.first = MaskedBytes;
Result.second = NotMaskTZ/8;
return Result;
@@ -12756,12 +13483,6 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
return false;
}
-static SDValue peekThroughBitcast(SDValue V) {
- while (V.getOpcode() == ISD::BITCAST)
- V = V.getOperand(0);
- return V;
-}
-
SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumStores) {
SmallVector<SDValue, 8> Chains;
@@ -12886,6 +13607,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
SDValue Val = St->getValue();
+ Val = peekThroughBitcast(Val);
StoreInt <<= ElementSizeBits;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
StoreInt |= C->getAPIntValue()
@@ -12918,13 +13640,13 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
FirstInChain->getPointerInfo(),
FirstInChain->getAlignment());
} else { // Must be realized as a trunc store
- EVT LegalizedStoredValueTy =
+ EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
- unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
+ unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
SDValue ExtendedStoreVal =
DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
- LegalizedStoredValueTy);
+ LegalizedStoredValTy);
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
@@ -12941,7 +13663,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
}
void DAGCombiner::getStoreMergeCandidates(
- StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
+ StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
+ SDNode *&RootNode) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
@@ -12970,6 +13693,12 @@ void DAGCombiner::getStoreMergeCandidates(
// Load and store should be the same type.
if (MemVT != LoadVT)
return;
+ // Loads must only have one use.
+ if (!Ld->hasNUsesOfValue(1, 0))
+ return;
+ // The memory operands must not be volatile.
+ if (Ld->isVolatile() || Ld->isIndexed())
+ return;
}
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
int64_t &Offset) -> bool {
@@ -12987,6 +13716,12 @@ void DAGCombiner::getStoreMergeCandidates(
auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
if (LoadVT != OtherLd->getMemoryVT())
return false;
+ // Loads must only have one use.
+ if (!OtherLd->hasNUsesOfValue(1, 0))
+ return false;
+ // The memory operands must not be volatile.
+ if (OtherLd->isVolatile() || OtherLd->isIndexed())
+ return false;
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
return false;
} else
@@ -13028,7 +13763,7 @@ void DAGCombiner::getStoreMergeCandidates(
// FIXME: We should be able to climb and
// descend TokenFactors to find candidates as well.
- SDNode *RootNode = (St->getChain()).getNode();
+ RootNode = St->getChain().getNode();
if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
RootNode = Ldn->getChain().getNode();
@@ -13059,31 +13794,54 @@ void DAGCombiner::getStoreMergeCandidates(
// through the chain). Check in parallel by searching up from
// non-chain operands of candidates.
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
- SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
+ SDNode *RootNode) {
// FIXME: We should be able to truncate a full search of
// predecessors by doing a BFS and keeping tabs the originating
// stores from which worklist nodes come from in a similar way to
// TokenFactor simplfication.
- SmallPtrSet<const SDNode *, 16> Visited;
+ SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 8> Worklist;
- unsigned int Max = 8192;
+
+ // RootNode is a predecessor to all candidates so we need not search
+ // past it. Add RootNode (peeking through TokenFactors). Do not count
+ // these towards size check.
+
+ Worklist.push_back(RootNode);
+ while (!Worklist.empty()) {
+ auto N = Worklist.pop_back_val();
+ if (N->getOpcode() == ISD::TokenFactor) {
+ for (SDValue Op : N->ops())
+ Worklist.push_back(Op.getNode());
+ }
+ Visited.insert(N);
+ }
+
+ // Don't count pruning nodes towards max.
+ unsigned int Max = 1024 + Visited.size();
// Search Ops of store candidates.
for (unsigned i = 0; i < NumStores; ++i) {
- SDNode *n = StoreNodes[i].MemNode;
- // Potential loops may happen only through non-chain operands
- for (unsigned j = 1; j < n->getNumOperands(); ++j)
- Worklist.push_back(n->getOperand(j).getNode());
+ SDNode *N = StoreNodes[i].MemNode;
+ // Of the 4 Store Operands:
+ // * Chain (Op 0) -> We have already considered these
+ // in candidate selection and can be
+ // safely ignored
+ // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
+ // * Address (Op 2) -> Merged addresses may only vary by a fixed constant
+ // and so no cycles are possible.
+ // * (Op 3) -> appears to always be undef. Cannot be source of cycle.
+ //
+ // Thus we need only check predecessors of the value operands.
+ auto *Op = N->getOperand(1).getNode();
+ if (Visited.insert(Op).second)
+ Worklist.push_back(Op);
}
// Search through DAG. We can stop early if we find a store node.
- for (unsigned i = 0; i < NumStores; ++i) {
+ for (unsigned i = 0; i < NumStores; ++i)
if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
Max))
return false;
- // Check if we ended early, failing conservatively if so.
- if (Visited.size() >= Max)
- return false;
- }
return true;
}
@@ -13121,8 +13879,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
return false;
SmallVector<MemOpLink, 8> StoreNodes;
+ SDNode *RootNode;
// Find potential store merge candidates by searching through chain sub-DAG
- getStoreMergeCandidates(St, StoreNodes);
+ getStoreMergeCandidates(St, StoreNodes, RootNode);
// Check if there is anything to merge.
if (StoreNodes.size() < 2)
@@ -13130,10 +13889,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// Sort the memory operands according to their distance from the
// base pointer.
- std::sort(StoreNodes.begin(), StoreNodes.end(),
- [](MemOpLink LHS, MemOpLink RHS) {
- return LHS.OffsetFromBase < RHS.OffsetFromBase;
- });
+ llvm::sort(StoreNodes.begin(), StoreNodes.end(),
+ [](MemOpLink LHS, MemOpLink RHS) {
+ return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ });
// Store Merge attempts to merge the lowest stores. This generally
// works out as if successful, as the remaining stores are checked
@@ -13177,178 +13936,191 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
continue;
}
- // Check that we can merge these candidates without causing a cycle
- if (!checkMergeStoreCandidatesForDependencies(StoreNodes,
- NumConsecutiveStores)) {
- StoreNodes.erase(StoreNodes.begin(),
- StoreNodes.begin() + NumConsecutiveStores);
- continue;
- }
-
// The node with the lowest store address.
LLVMContext &Context = *DAG.getContext();
const DataLayout &DL = DAG.getDataLayout();
// Store the constants into memory as one consecutive store.
if (IsConstantSrc) {
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
- unsigned LastLegalType = 1;
- unsigned LastLegalVectorType = 1;
- bool LastIntegerTrunc = false;
- bool NonZero = false;
- unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
- for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
- StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue StoredVal = ST->getValue();
- bool IsElementZero = false;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
- IsElementZero = C->isNullValue();
- else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
- IsElementZero = C->getConstantFPValue()->isNullValue();
- if (IsElementZero) {
- if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
- FirstZeroAfterNonZero = i;
- }
- NonZero |= !IsElementZero;
+ while (NumConsecutiveStores >= 2) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned LastLegalType = 1;
+ unsigned LastLegalVectorType = 1;
+ bool LastIntegerTrunc = false;
+ bool NonZero = false;
+ unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = ST->getValue();
+ bool IsElementZero = false;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
+ IsElementZero = C->isNullValue();
+ else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
+ IsElementZero = C->getConstantFPValue()->isNullValue();
+ if (IsElementZero) {
+ if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
+ FirstZeroAfterNonZero = i;
+ }
+ NonZero |= !IsElementZero;
- // Find a legal type for the constant store.
- unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
- EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- bool IsFast = false;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
- IsFast) {
- LastIntegerTrunc = false;
- LastLegalType = i + 1;
- // Or check whether a truncstore is legal.
- } else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
- EVT LegalizedStoredValueTy =
- TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
- if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
+ // Find a legal type for the constant store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ bool IsFast = false;
+
+ // Break early when size is too large to be legal.
+ if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
+
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast) {
- LastIntegerTrunc = true;
+ LastIntegerTrunc = false;
LastLegalType = i + 1;
+ // Or check whether a truncstore is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValTy =
+ TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
+ if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
+ IsFast) {
+ LastIntegerTrunc = true;
+ LastLegalType = i + 1;
+ }
}
- }
- // We only use vectors if the constant is known to be zero or the target
- // allows it and the function is not marked with the noimplicitfloat
- // attribute.
- if ((!NonZero ||
- TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
- !NoVectors) {
- // Find a legal type for the vector store.
- unsigned Elts = (i + 1) * NumMemElts;
- EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
- if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
- TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
- IsFast)
- LastLegalVectorType = i + 1;
+ // We only use vectors if the constant is known to be zero or the
+ // target allows it and the function is not marked with the
+ // noimplicitfloat attribute.
+ if ((!NonZero ||
+ TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
+ !NoVectors) {
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+ if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
+ IsFast)
+ LastLegalVectorType = i + 1;
+ }
}
- }
- bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
- unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
+ bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
+ unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
+
+ // Check if we found a legal integer type that creates a meaningful
+ // merge.
+ if (NumElem < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved or we've dropped a non-zero value. Drop as many
+ // candidates as we can here.
+ unsigned NumSkip = 1;
+ while (
+ (NumSkip < NumConsecutiveStores) &&
+ (NumSkip < FirstZeroAfterNonZero) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
- // Check if we found a legal integer type that creates a meaningful merge.
- if (NumElem < 2) {
- // We know that candidate stores are in order and of correct
- // shape. While there is no mergeable sequence from the
- // beginning one may start later in the sequence. The only
- // reason a merge of size N could have failed where another of
- // the same size would not have, is if the alignment has
- // improved or we've dropped a non-zero value. Drop as many
- // candidates as we can here.
- unsigned NumSkip = 1;
- while (
- (NumSkip < NumConsecutiveStores) &&
- (NumSkip < FirstZeroAfterNonZero) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) {
- NumSkip++;
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
+ RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ continue;
}
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
- continue;
- }
- bool Merged = MergeStoresOfConstantsOrVecElts(
- StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
- RV |= Merged;
+ RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
+ UseVector, LastIntegerTrunc);
- // Remove merged stores for next iteration.
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ // Remove merged stores for next iteration.
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ }
continue;
}
// When extracting multiple vector elements, try to store them
// in one vector store rather than a sequence of scalar stores.
if (IsExtractVecSrc) {
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
- unsigned NumStoresToMerge = 1;
- for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue StVal = peekThroughBitcast(St->getValue());
- // This restriction could be loosened.
- // Bail out if any stored values are not elements extracted from a
- // vector. It should be possible to handle mixed sources, but load
- // sources need more careful handling (see the block of code below that
- // handles consecutive loads).
- if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
- StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR)
- return RV;
+ // Loop on Consecutive Stores on success.
+ while (NumConsecutiveStores >= 2) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned NumStoresToMerge = 1;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT Ty =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ bool IsFast;
- // Find a legal type for the vector store.
- unsigned Elts = (i + 1) * NumMemElts;
- EVT Ty =
- EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
- bool IsFast;
- if (TLI.isTypeLegal(Ty) &&
- TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
- IsFast)
- NumStoresToMerge = i + 1;
- }
+ // Break early when size is too large to be legal.
+ if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
- // Check if we found a legal integer type that creates a meaningful merge.
- if (NumStoresToMerge < 2) {
- // We know that candidate stores are in order and of correct
- // shape. While there is no mergeable sequence from the
- // beginning one may start later in the sequence. The only
- // reason a merge of size N could have failed where another of
- // the same size would not have, is if the alignment has
- // improved. Drop as many candidates as we can here.
- unsigned NumSkip = 1;
- while ((NumSkip < NumConsecutiveStores) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
- NumSkip++;
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
+ IsFast)
+ NumStoresToMerge = i + 1;
+ }
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
- continue;
- }
+ // Check if we found a legal integer type creating a meaningful
+ // merge.
+ if (NumStoresToMerge < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved. Drop as many candidates as we can here.
+ unsigned NumSkip = 1;
+ while (
+ (NumSkip < NumConsecutiveStores) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
+
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(
+ StoreNodes, NumStoresToMerge, RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumStoresToMerge);
+ NumConsecutiveStores -= NumStoresToMerge;
+ continue;
+ }
+
+ RV |= MergeStoresOfConstantsOrVecElts(
+ StoreNodes, MemVT, NumStoresToMerge, false, true, false);
- bool Merged = MergeStoresOfConstantsOrVecElts(
- StoreNodes, MemVT, NumStoresToMerge, false, true, false);
- if (!Merged) {
StoreNodes.erase(StoreNodes.begin(),
StoreNodes.begin() + NumStoresToMerge);
- continue;
+ NumConsecutiveStores -= NumStoresToMerge;
}
- // Remove merged stores for next iteration.
- StoreNodes.erase(StoreNodes.begin(),
- StoreNodes.begin() + NumStoresToMerge);
- RV = true;
continue;
}
@@ -13362,24 +14134,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// Find acceptable loads. Loads need to have the same chain (token factor),
// must not be zext, volatile, indexed, and they must be consecutive.
BaseIndexOffset LdBasePtr;
+
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue Val = peekThroughBitcast(St->getValue());
- LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val);
- if (!Ld)
- break;
-
- // Loads must only have one use.
- if (!Ld->hasNUsesOfValue(1, 0))
- break;
-
- // The memory operands must not be volatile.
- if (Ld->isVolatile() || Ld->isIndexed())
- break;
-
- // The stored memory type must be the same.
- if (Ld->getMemoryVT() != MemVT)
- break;
+ LoadSDNode *Ld = cast<LoadSDNode>(Val);
BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
// If this is not the first ptr that we check.
@@ -13397,90 +14156,75 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
LoadNodes.push_back(MemOpLink(Ld, LdOffset));
}
- if (LoadNodes.size() < 2) {
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
- continue;
- }
+ while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
+ // If we have load/store pair instructions and we only have two values,
+ // don't bother merging.
+ unsigned RequiredAlignment;
+ if (LoadNodes.size() == 2 &&
+ TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+ StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
+ break;
+ }
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+ unsigned FirstLoadAS = FirstLoad->getAddressSpace();
+ unsigned FirstLoadAlign = FirstLoad->getAlignment();
- // If we have load/store pair instructions and we only have two values,
- // don't bother merging.
- unsigned RequiredAlignment;
- if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
- StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
- continue;
- }
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
- LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
- unsigned FirstLoadAS = FirstLoad->getAddressSpace();
- unsigned FirstLoadAlign = FirstLoad->getAlignment();
+ // Scan the memory operations on the chain and find the first
+ // non-consecutive load memory address. These variables hold the index in
+ // the store node array.
- // Scan the memory operations on the chain and find the first
- // non-consecutive load memory address. These variables hold the index in
- // the store node array.
- unsigned LastConsecutiveLoad = 1;
- // This variable refers to the size and not index in the array.
- unsigned LastLegalVectorType = 1;
- unsigned LastLegalIntegerType = 1;
- bool isDereferenceable = true;
- bool DoIntegerTruncate = false;
- StartAddress = LoadNodes[0].OffsetFromBase;
- SDValue FirstChain = FirstLoad->getChain();
- for (unsigned i = 1; i < LoadNodes.size(); ++i) {
- // All loads must share the same chain.
- if (LoadNodes[i].MemNode->getChain() != FirstChain)
- break;
+ unsigned LastConsecutiveLoad = 1;
- int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
- if (CurrAddress - StartAddress != (ElementSizeBytes * i))
- break;
- LastConsecutiveLoad = i;
-
- if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
- isDereferenceable = false;
-
- // Find a legal type for the vector store.
- unsigned Elts = (i + 1) * NumMemElts;
- EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
-
- bool IsFastSt, IsFastLd;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) &&
- IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) &&
- IsFastLd) {
- LastLegalVectorType = i + 1;
- }
+ // This variable refers to the size and not index in the array.
+ unsigned LastLegalVectorType = 1;
+ unsigned LastLegalIntegerType = 1;
+ bool isDereferenceable = true;
+ bool DoIntegerTruncate = false;
+ StartAddress = LoadNodes[0].OffsetFromBase;
+ SDValue FirstChain = FirstLoad->getChain();
+ for (unsigned i = 1; i < LoadNodes.size(); ++i) {
+ // All loads must share the same chain.
+ if (LoadNodes[i].MemNode->getChain() != FirstChain)
+ break;
+
+ int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ LastConsecutiveLoad = i;
+
+ if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
+ isDereferenceable = false;
+
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+
+ // Break early when size is too large to be legal.
+ if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
+
+ bool IsFastSt, IsFastLd;
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) &&
+ IsFastLd) {
+ LastLegalVectorType = i + 1;
+ }
- // Find a legal type for the integer store.
- unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
- StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) &&
- IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) &&
- IsFastLd) {
- LastLegalIntegerType = i + 1;
- DoIntegerTruncate = false;
- // Or check whether a truncstore and extload is legal.
- } else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
- EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
- if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
- StoreTy) &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
- StoreTy) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
+ // Find a legal type for the integer store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFastSt) &&
IsFastSt &&
@@ -13488,105 +14232,140 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
FirstLoadAlign, &IsFastLd) &&
IsFastLd) {
LastLegalIntegerType = i + 1;
- DoIntegerTruncate = true;
+ DoIntegerTruncate = false;
+ // Or check whether a truncstore and extload is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
+ if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
+ StoreTy) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
+ StoreTy) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) &&
+ IsFastLd) {
+ LastLegalIntegerType = i + 1;
+ DoIntegerTruncate = true;
+ }
}
}
- }
- // Only use vector types if the vector type is larger than the integer type.
- // If they are the same, use integers.
- bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
- unsigned LastLegalType =
- std::max(LastLegalVectorType, LastLegalIntegerType);
-
- // We add +1 here because the LastXXX variables refer to location while
- // the NumElem refers to array/index size.
- unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
- NumElem = std::min(LastLegalType, NumElem);
-
- if (NumElem < 2) {
- // We know that candidate stores are in order and of correct
- // shape. While there is no mergeable sequence from the
- // beginning one may start later in the sequence. The only
- // reason a merge of size N could have failed where another of
- // the same size would not have is if the alignment or either
- // the load or store has improved. Drop as many candidates as we
- // can here.
- unsigned NumSkip = 1;
- while ((NumSkip < LoadNodes.size()) &&
- (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
- NumSkip++;
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
- continue;
- }
+ // Only use vector types if the vector type is larger than the integer
+ // type. If they are the same, use integers.
+ bool UseVectorTy =
+ LastLegalVectorType > LastLegalIntegerType && !NoVectors;
+ unsigned LastLegalType =
+ std::max(LastLegalVectorType, LastLegalIntegerType);
- // Find if it is better to use vectors or integers to load and store
- // to memory.
- EVT JointMemOpVT;
- if (UseVectorTy) {
- // Find a legal type for the vector store.
- unsigned Elts = NumElem * NumMemElts;
- JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
- } else {
- unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
- JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
- }
-
- SDLoc LoadDL(LoadNodes[0].MemNode);
- SDLoc StoreDL(StoreNodes[0].MemNode);
-
- // The merged loads are required to have the same incoming chain, so
- // using the first's chain is acceptable.
-
- SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
- AddToWorklist(NewStoreChain.getNode());
-
- MachineMemOperand::Flags MMOFlags = isDereferenceable ?
- MachineMemOperand::MODereferenceable:
- MachineMemOperand::MONone;
-
- SDValue NewLoad, NewStore;
- if (UseVectorTy || !DoIntegerTruncate) {
- NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
- FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), FirstLoadAlign,
- MMOFlags);
- NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstStoreAlign);
- } else { // This must be the truncstore/extload case
- EVT ExtendedTy =
- TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
- NewLoad =
- DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
- FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
- JointMemOpVT, FirstLoadAlign, MMOFlags);
- NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), JointMemOpVT,
- FirstInChain->getAlignment(),
- FirstInChain->getMemOperand()->getFlags());
- }
-
- // Transfer chain users from old loads to the new load.
- for (unsigned i = 0; i < NumElem; ++i) {
- LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
- SDValue(NewLoad.getNode(), 1));
- }
-
- // Replace the all stores with the new store. Recursively remove
- // corresponding value if its no longer used.
- for (unsigned i = 0; i < NumElem; ++i) {
- SDValue Val = StoreNodes[i].MemNode->getOperand(1);
- CombineTo(StoreNodes[i].MemNode, NewStore);
- if (Val.getNode()->use_empty())
- recursivelyDeleteUnusedNodes(Val.getNode());
- }
-
- RV = true;
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ // We add +1 here because the LastXXX variables refer to location while
+ // the NumElem refers to array/index size.
+ unsigned NumElem =
+ std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
+ NumElem = std::min(LastLegalType, NumElem);
+
+ if (NumElem < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have is if the alignment or either
+ // the load or store has improved. Drop as many candidates as we
+ // can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < LoadNodes.size()) &&
+ (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
+
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
+ RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ continue;
+ }
+
+ // Find if it is better to use vectors or integers to load and store
+ // to memory.
+ EVT JointMemOpVT;
+ if (UseVectorTy) {
+ // Find a legal type for the vector store.
+ unsigned Elts = NumElem * NumMemElts;
+ JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+ } else {
+ unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+ JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
+ }
+
+ SDLoc LoadDL(LoadNodes[0].MemNode);
+ SDLoc StoreDL(StoreNodes[0].MemNode);
+
+ // The merged loads are required to have the same incoming chain, so
+ // using the first's chain is acceptable.
+
+ SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
+ AddToWorklist(NewStoreChain.getNode());
+
+ MachineMemOperand::Flags MMOFlags =
+ isDereferenceable ? MachineMemOperand::MODereferenceable
+ : MachineMemOperand::MONone;
+
+ SDValue NewLoad, NewStore;
+ if (UseVectorTy || !DoIntegerTruncate) {
+ NewLoad =
+ DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
+ FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
+ FirstLoadAlign, MMOFlags);
+ NewStore = DAG.getStore(
+ NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), FirstStoreAlign);
+ } else { // This must be the truncstore/extload case
+ EVT ExtendedTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
+ NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
+ FirstLoad->getChain(), FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), JointMemOpVT,
+ FirstLoadAlign, MMOFlags);
+ NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ JointMemOpVT, FirstInChain->getAlignment(),
+ FirstInChain->getMemOperand()->getFlags());
+ }
+
+ // Transfer chain users from old loads to the new load.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
+ }
+
+ // Replace the all stores with the new store. Recursively remove
+ // corresponding value if its no longer used.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ SDValue Val = StoreNodes[i].MemNode->getOperand(1);
+ CombineTo(StoreNodes[i].MemNode, NewStore);
+ if (Val.getNode()->use_empty())
+ recursivelyDeleteUnusedNodes(Val.getNode());
+ }
+
+ RV = true;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ }
}
return RV;
}
@@ -13728,13 +14507,14 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
- if (Align > ST->getAlignment()) {
+ if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
SDValue NewStore =
DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
ST->getMemoryVT(), Align,
ST->getMemOperand()->getFlags(), ST->getAAInfo());
- if (NewStore.getNode() != N)
- return CombineTo(ST, NewStore, true);
+ // NewStore will always be N as we are only refining the alignment
+ assert(NewStore.getNode() == N);
+ (void)NewStore;
}
}
}
@@ -14216,6 +14996,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue EltNo = N->getOperand(1);
ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ // extract_vector_elt of out-of-bounds element -> UNDEF
+ if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements()))
+ return DAG.getUNDEF(NVT);
+
// extract_vector_elt (build_vector x, y), 1 -> y
if (ConstEltNo &&
InVec.getOpcode() == ISD::BUILD_VECTOR &&
@@ -14301,6 +15085,23 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
}
+ // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
+ // simplify it based on the (valid) extraction indices.
+ if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
+ return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Use->getOperand(0) == InVec &&
+ isa<ConstantSDNode>(Use->getOperand(1));
+ })) {
+ APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
+ for (SDNode *Use : InVec->uses()) {
+ auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
+ if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
+ DemandedElts.setBit(CstElt->getZExtValue());
+ }
+ if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
+ return SDValue(N, 0);
+ }
+
bool BCNumEltsChanged = false;
EVT ExtVT = VT.getVectorElementType();
EVT LVT = ExtVT;
@@ -14507,7 +15308,10 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
"Invalid vector size");
// Check if the new vector type is legal.
- if (!isTypeLegal(VecVT)) return SDValue();
+ if (!isTypeLegal(VecVT) ||
+ (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
+ TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
+ return SDValue();
// Make the new BUILD_VECTOR.
SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
@@ -14754,12 +15558,16 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
}
// Not an undef or zero. If the input is something other than an
- // EXTRACT_VECTOR_ELT with a constant index, bail out.
+ // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(Op.getOperand(1)))
return SDValue();
SDValue ExtractedFromVec = Op.getOperand(0);
+ APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+ if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
+ return SDValue();
+
// All inputs must have the same element type as the output.
if (VT.getVectorElementType() !=
ExtractedFromVec.getValueType().getVectorElementType())
@@ -14915,6 +15723,54 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
return Shuffles[0];
}
+// Try to turn a build vector of zero extends of extract vector elts into a
+// a vector zero extend and possibly an extract subvector.
+// TODO: Support sign extend or any extend?
+// TODO: Allow undef elements?
+// TODO: Don't require the extracts to start at element 0.
+SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
+ if (LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ SDValue Op0 = N->getOperand(0);
+ auto checkElem = [&](SDValue Op) -> int64_t {
+ if (Op.getOpcode() == ISD::ZERO_EXTEND &&
+ Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
+ if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
+ return C->getZExtValue();
+ return -1;
+ };
+
+ // Make sure the first element matches
+ // (zext (extract_vector_elt X, C))
+ int64_t Offset = checkElem(Op0);
+ if (Offset < 0)
+ return SDValue();
+
+ unsigned NumElems = N->getNumOperands();
+ SDValue In = Op0.getOperand(0).getOperand(0);
+ EVT InSVT = In.getValueType().getScalarType();
+ EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
+
+ // Don't create an illegal input type after type legalization.
+ if (LegalTypes && !TLI.isTypeLegal(InVT))
+ return SDValue();
+
+ // Ensure all the elements come from the same vector and are adjacent.
+ for (unsigned i = 1; i != NumElems; ++i) {
+ if ((Offset + i) != checkElem(N->getOperand(i)))
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+ In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
+ Op0.getOperand(0).getOperand(1));
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
+}
+
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
@@ -14922,6 +15778,32 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(VT);
+ // If this is a splat of a bitcast from another vector, change to a
+ // concat_vector.
+ // For example:
+ // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
+ // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
+ //
+ // If X is a build_vector itself, the concat can become a larger build_vector.
+ // TODO: Maybe this is useful for non-splat too?
+ if (!LegalOperations) {
+ if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
+ Splat = peekThroughBitcast(Splat);
+ EVT SrcVT = Splat.getValueType();
+ if (SrcVT.isVector()) {
+ unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
+ SrcVT.getVectorElementType(), NumElts);
+ if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
+ SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
+ NewVT, Ops);
+ return DAG.getBitcast(VT, Concat);
+ }
+ }
+ }
+ }
+
// Check if we can express BUILD VECTOR via subvector extract.
if (!LegalTypes && (N->getNumOperands() > 1)) {
SDValue Op0 = N->getOperand(0);
@@ -14951,6 +15833,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
Op0.getOperand(0), Op0.getOperand(1));
}
+ if (SDValue V = convertBuildVecZextToZext(N))
+ return V;
+
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
@@ -15140,6 +16025,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
return SDValue();
+ // Bail out if the vector size is not a multiple of the scalar size.
+ if (VT.getSizeInBits() % SclTy.getSizeInBits())
+ return SDValue();
+
unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
if (VNTNumElms < 2)
return SDValue();
@@ -15418,13 +16307,22 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// Only do this if we won't split any elements.
if (ExtractSize % EltSize == 0) {
unsigned NumElems = ExtractSize / EltSize;
- EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
- InVT.getVectorElementType(), NumElems);
- if ((!LegalOperations ||
- TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) &&
+ EVT EltVT = InVT.getVectorElementType();
+ EVT ExtractVT = NumElems == 1 ? EltVT :
+ EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
+ if ((Level < AfterLegalizeDAG ||
+ (NumElems == 1 ||
+ TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
(!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
EltSize;
+ if (NumElems == 1) {
+ SDValue Src = V->getOperand(IdxVal);
+ if (EltVT != Src.getValueType())
+ Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
+
+ return DAG.getBitcast(NVT, Src);
+ }
// Extract the pieces from the original build_vector.
SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
@@ -15466,122 +16364,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
return NarrowBOp;
- return SDValue();
-}
-
-static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
- SDValue V, SelectionDAG &DAG) {
- SDLoc DL(V);
- EVT VT = V.getValueType();
-
- switch (V.getOpcode()) {
- default:
- return V;
-
- case ISD::CONCAT_VECTORS: {
- EVT OpVT = V->getOperand(0).getValueType();
- int OpSize = OpVT.getVectorNumElements();
- SmallBitVector OpUsedElements(OpSize, false);
- bool FoundSimplification = false;
- SmallVector<SDValue, 4> NewOps;
- NewOps.reserve(V->getNumOperands());
- for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
- SDValue Op = V->getOperand(i);
- bool OpUsed = false;
- for (int j = 0; j < OpSize; ++j)
- if (UsedElements[i * OpSize + j]) {
- OpUsedElements[j] = true;
- OpUsed = true;
- }
- NewOps.push_back(
- OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
- : DAG.getUNDEF(OpVT));
- FoundSimplification |= Op == NewOps.back();
- OpUsedElements.reset();
- }
- if (FoundSimplification)
- V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
- return V;
- }
-
- case ISD::INSERT_SUBVECTOR: {
- SDValue BaseV = V->getOperand(0);
- SDValue SubV = V->getOperand(1);
- auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
- if (!IdxN)
- return V;
-
- int SubSize = SubV.getValueType().getVectorNumElements();
- int Idx = IdxN->getZExtValue();
- bool SubVectorUsed = false;
- SmallBitVector SubUsedElements(SubSize, false);
- for (int i = 0; i < SubSize; ++i)
- if (UsedElements[i + Idx]) {
- SubVectorUsed = true;
- SubUsedElements[i] = true;
- UsedElements[i + Idx] = false;
- }
-
- // Now recurse on both the base and sub vectors.
- SDValue SimplifiedSubV =
- SubVectorUsed
- ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
- : DAG.getUNDEF(SubV.getValueType());
- SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
- if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
- V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
- SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
- return V;
- }
- }
-}
-
-static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
- SDValue N1, SelectionDAG &DAG) {
- EVT VT = SVN->getValueType(0);
- int NumElts = VT.getVectorNumElements();
- SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
- for (int M : SVN->getMask())
- if (M >= 0 && M < NumElts)
- N0UsedElements[M] = true;
- else if (M >= NumElts)
- N1UsedElements[M - NumElts] = true;
-
- SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
- SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
- if (S0 == N0 && S1 == N1)
- return SDValue();
-
- return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
-}
-
-static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0,
- SDValue N1, SelectionDAG &DAG) {
- auto isUndefElt = [](SDValue V, int Idx) {
- // TODO - handle more cases as required.
- if (V.getOpcode() == ISD::BUILD_VECTOR)
- return V.getOperand(Idx).isUndef();
- if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
- return (Idx != 0) || V.getOperand(0).isUndef();
- return false;
- };
-
- EVT VT = SVN->getValueType(0);
- unsigned NumElts = VT.getVectorNumElements();
-
- bool Changed = false;
- SmallVector<int, 8> NewMask;
- for (unsigned i = 0; i != NumElts; ++i) {
- int Idx = SVN->getMaskElt(i);
- if ((0 <= Idx && Idx < (int)NumElts && isUndefElt(N0, Idx)) ||
- ((int)NumElts < Idx && isUndefElt(N1, Idx - NumElts))) {
- Changed = true;
- Idx = -1;
- }
- NewMask.push_back(Idx);
- }
- if (Changed)
- return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask);
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
return SDValue();
}
@@ -16028,10 +16812,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
}
- // Simplify shuffle mask if a referenced element is UNDEF.
- if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG))
- return V;
-
if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
return InsElt;
@@ -16092,11 +16872,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
- // There are various patterns used to build up a vector from smaller vectors,
- // subvectors, or elements. Scan chains of these and replace unused insertions
- // or components with undef.
- if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
- return S;
+ // Simplify source operands based on shuffle mask.
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
// Match shuffles that can be converted to any_vector_extend_in_reg.
if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
@@ -16422,10 +17200,11 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
SDValue CN0 = N0.getOperand(0);
SDValue CN1 = N1.getOperand(0);
- if (CN0.getValueType().getVectorElementType() ==
- CN1.getValueType().getVectorElementType() &&
- CN0.getValueType().getVectorNumElements() ==
- VT.getVectorNumElements()) {
+ EVT CN0VT = CN0.getValueType();
+ EVT CN1VT = CN1.getValueType();
+ if (CN0VT.isVector() && CN1VT.isVector() &&
+ CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
+ CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
CN0.getValueType(), CN0, CN1, N2);
return DAG.getBitcast(VT, NewINSERT);
@@ -16680,14 +17459,14 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
const ConstantFPSDNode *Zero = nullptr;
if (TheSelect->getOpcode() == ISD::SELECT_CC) {
- CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
+ CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
CmpLHS = TheSelect->getOperand(0);
Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
} else {
// SELECT or VSELECT
SDValue Cmp = TheSelect->getOperand(0);
if (Cmp.getOpcode() == ISD::SETCC) {
- CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
+ CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
CmpLHS = Cmp.getOperand(0);
Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
}
@@ -16905,24 +17684,6 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
return !SCCC->isNullValue() ? N2 : N3;
}
- // Check to see if we can simplify the select into an fabs node
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
- // Allow either -0.0 or 0.0
- if (CFP->isZero()) {
- // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
- if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
- N0 == N2 && N3.getOpcode() == ISD::FNEG &&
- N2 == N3.getOperand(0))
- return DAG.getNode(ISD::FABS, DL, VT, N0);
-
- // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
- if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
- N0 == N3 && N2.getOpcode() == ISD::FNEG &&
- N2.getOperand(0) == N3)
- return DAG.getNode(ISD::FABS, DL, VT, N3);
- }
- }
-
// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
// in it. This is a win when the constant is not otherwise available because
@@ -17400,19 +18161,34 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
: buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
if (!Reciprocal) {
- // Unfortunately, Est is now NaN if the input was exactly 0.0.
- // Select out this case and force the answer to 0.0.
+ // The estimate is now completely wrong if the input was exactly 0.0 or
+ // possibly a denormal. Force the answer to 0.0 for those cases.
EVT VT = Op.getValueType();
SDLoc DL(Op);
-
- SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
EVT CCVT = getSetCCResultType(VT);
- SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
- AddToWorklist(ZeroCmp.getNode());
-
- Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
- ZeroCmp, FPZero, Est);
- AddToWorklist(Est.getNode());
+ ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
+ const Function &F = DAG.getMachineFunction().getFunction();
+ Attribute Denorms = F.getFnAttribute("denormal-fp-math");
+ if (Denorms.getValueAsString().equals("ieee")) {
+ // fabs(X) < SmallestNormal ? 0.0 : Est
+ const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
+ APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
+ SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
+ SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
+ Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
+ AddToWorklist(Fabs.getNode());
+ AddToWorklist(IsDenorm.getNode());
+ AddToWorklist(Est.getNode());
+ } else {
+ // X == 0.0 ? 0.0 : Est
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+ Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
+ AddToWorklist(IsZero.getNode());
+ AddToWorklist(Est.getNode());
+ }
}
}
return Est;
@@ -17715,7 +18491,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
Index = nullptr;
break;
}
- } // end while
+ }// end while
}
// At this point, ChainedStores lists all of the Store nodes
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 3c856914053b..e4a9d557d386 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -61,7 +61,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -99,6 +98,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -113,6 +113,11 @@ using namespace llvm;
#define DEBUG_TYPE "isel"
+// FIXME: Remove this after the feature has proven reliable.
+static cl::opt<bool> SinkLocalValues("fast-isel-sink-local-values",
+ cl::init(true), cl::Hidden,
+ cl::desc("Sink local values in FastISel"));
+
STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
"target-independent selector");
STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
@@ -120,9 +125,10 @@ STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
/// Set the current block to which generated machine instructions will be
-/// appended, and clear the local CSE map.
+/// appended.
void FastISel::startNewBlock() {
- LocalValueMap.clear();
+ assert(LocalValueMap.empty() &&
+ "local values should be cleared after finishing a BB");
// Instructions are appended to FuncInfo.MBB. If the basic block already
// contains labels or copies, use the last instruction as the last local
@@ -133,6 +139,9 @@ void FastISel::startNewBlock() {
LastLocalValue = EmitStartPt;
}
+/// Flush the local CSE map and sink anything we can.
+void FastISel::finishBasicBlock() { flushLocalValueMap(); }
+
bool FastISel::lowerArguments() {
if (!FuncInfo.CanLowerReturn)
// Fallback to SDISel argument lowering code to deal with sret pointer
@@ -153,11 +162,168 @@ bool FastISel::lowerArguments() {
return true;
}
+/// Return the defined register if this instruction defines exactly one
+/// virtual register and uses no other virtual registers. Otherwise return 0.
+static unsigned findSinkableLocalRegDef(MachineInstr &MI) {
+ unsigned RegDef = 0;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef()) {
+ if (RegDef)
+ return 0;
+ RegDef = MO.getReg();
+ } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ // This is another use of a vreg. Don't try to sink it.
+ return 0;
+ }
+ }
+ return RegDef;
+}
+
void FastISel::flushLocalValueMap() {
+ // Try to sink local values down to their first use so that we can give them a
+ // better debug location. This has the side effect of shrinking local value
+ // live ranges, which helps out fast regalloc.
+ if (SinkLocalValues && LastLocalValue != EmitStartPt) {
+ // Sink local value materialization instructions between EmitStartPt and
+ // LastLocalValue. Visit them bottom-up, starting from LastLocalValue, to
+ // avoid inserting into the range that we're iterating over.
+ MachineBasicBlock::reverse_iterator RE =
+ EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt)
+ : FuncInfo.MBB->rend();
+ MachineBasicBlock::reverse_iterator RI(LastLocalValue);
+
+ InstOrderMap OrderMap;
+ for (; RI != RE;) {
+ MachineInstr &LocalMI = *RI;
+ ++RI;
+ bool Store = true;
+ if (!LocalMI.isSafeToMove(nullptr, Store))
+ continue;
+ unsigned DefReg = findSinkableLocalRegDef(LocalMI);
+ if (DefReg == 0)
+ continue;
+
+ sinkLocalValueMaterialization(LocalMI, DefReg, OrderMap);
+ }
+ }
+
LocalValueMap.clear();
LastLocalValue = EmitStartPt;
recomputeInsertPt();
SavedInsertPt = FuncInfo.InsertPt;
+ LastFlushPoint = FuncInfo.InsertPt;
+}
+
+static bool isRegUsedByPhiNodes(unsigned DefReg,
+ FunctionLoweringInfo &FuncInfo) {
+ for (auto &P : FuncInfo.PHINodesToUpdate)
+ if (P.second == DefReg)
+ return true;
+ return false;
+}
+
+/// Build a map of instruction orders. Return the first terminator and its
+/// order. Consider EH_LABEL instructions to be terminators as well, since local
+/// values for phis after invokes must be materialized before the call.
+void FastISel::InstOrderMap::initialize(
+ MachineBasicBlock *MBB, MachineBasicBlock::iterator LastFlushPoint) {
+ unsigned Order = 0;
+ for (MachineInstr &I : *MBB) {
+ if (!FirstTerminator &&
+ (I.isTerminator() || (I.isEHLabel() && &I != &MBB->front()))) {
+ FirstTerminator = &I;
+ FirstTerminatorOrder = Order;
+ }
+ Orders[&I] = Order++;
+
+ // We don't need to order instructions past the last flush point.
+ if (I.getIterator() == LastFlushPoint)
+ break;
+ }
+}
+
+void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI,
+ unsigned DefReg,
+ InstOrderMap &OrderMap) {
+ // If this register is used by a register fixup, MRI will not contain all
+ // the uses until after register fixups, so don't attempt to sink or DCE
+ // this instruction. Register fixups typically come from no-op cast
+ // instructions, which replace the cast instruction vreg with the local
+ // value vreg.
+ if (FuncInfo.RegsWithFixups.count(DefReg))
+ return;
+
+ // We can DCE this instruction if there are no uses and it wasn't a
+ // materialized for a successor PHI node.
+ bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo);
+ if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) {
+ if (EmitStartPt == &LocalMI)
+ EmitStartPt = EmitStartPt->getPrevNode();
+ LLVM_DEBUG(dbgs() << "removing dead local value materialization "
+ << LocalMI);
+ OrderMap.Orders.erase(&LocalMI);
+ LocalMI.eraseFromParent();
+ return;
+ }
+
+ // Number the instructions if we haven't yet so we can efficiently find the
+ // earliest use.
+ if (OrderMap.Orders.empty())
+ OrderMap.initialize(FuncInfo.MBB, LastFlushPoint);
+
+ // Find the first user in the BB.
+ MachineInstr *FirstUser = nullptr;
+ unsigned FirstOrder = std::numeric_limits<unsigned>::max();
+ for (MachineInstr &UseInst : MRI.use_nodbg_instructions(DefReg)) {
+ auto I = OrderMap.Orders.find(&UseInst);
+ assert(I != OrderMap.Orders.end() &&
+ "local value used by instruction outside local region");
+ unsigned UseOrder = I->second;
+ if (UseOrder < FirstOrder) {
+ FirstOrder = UseOrder;
+ FirstUser = &UseInst;
+ }
+ }
+
+ // The insertion point will be the first terminator or the first user,
+ // whichever came first. If there was no terminator, this must be a
+ // fallthrough block and the insertion point is the end of the block.
+ MachineBasicBlock::instr_iterator SinkPos;
+ if (UsedByPHI && OrderMap.FirstTerminatorOrder < FirstOrder) {
+ FirstOrder = OrderMap.FirstTerminatorOrder;
+ SinkPos = OrderMap.FirstTerminator->getIterator();
+ } else if (FirstUser) {
+ SinkPos = FirstUser->getIterator();
+ } else {
+ assert(UsedByPHI && "must be users if not used by a phi");
+ SinkPos = FuncInfo.MBB->instr_end();
+ }
+
+ // Collect all DBG_VALUEs before the new insertion position so that we can
+ // sink them.
+ SmallVector<MachineInstr *, 1> DbgValues;
+ for (MachineInstr &DbgVal : MRI.use_instructions(DefReg)) {
+ if (!DbgVal.isDebugValue())
+ continue;
+ unsigned UseOrder = OrderMap.Orders[&DbgVal];
+ if (UseOrder < FirstOrder)
+ DbgValues.push_back(&DbgVal);
+ }
+
+ // Sink LocalMI before SinkPos and assign it the same DebugLoc.
+ LLVM_DEBUG(dbgs() << "sinking local value to first use " << LocalMI);
+ FuncInfo.MBB->remove(&LocalMI);
+ FuncInfo.MBB->insert(SinkPos, &LocalMI);
+ if (SinkPos != FuncInfo.MBB->end())
+ LocalMI.setDebugLoc(SinkPos->getDebugLoc());
+
+ // Sink any debug values that we've collected.
+ for (MachineInstr *DI : DbgValues) {
+ FuncInfo.MBB->remove(DI);
+ FuncInfo.MBB->insert(SinkPos, DI);
+ }
}
bool FastISel::hasTrivialKill(const Value *V) {
@@ -328,8 +494,10 @@ void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
AssignedReg = Reg;
else if (Reg != AssignedReg) {
// Arrange for uses of AssignedReg to be replaced by uses of Reg.
- for (unsigned i = 0; i < NumRegs; i++)
+ for (unsigned i = 0; i < NumRegs; i++) {
FuncInfo.RegFixups[AssignedReg + i] = Reg + i;
+ FuncInfo.RegsWithFixups.insert(Reg + i);
+ }
AssignedReg = Reg;
}
@@ -681,7 +849,7 @@ bool FastISel::selectStackmap(const CallInst *I) {
return true;
}
-/// \brief Lower an argument list according to the target calling convention.
+/// Lower an argument list according to the target calling convention.
///
/// This is a helper for lowering intrinsics that follow a target calling
/// convention or require stack pointer adjustment. Only a subset of the
@@ -702,7 +870,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
ArgListEntry Entry;
Entry.Val = V;
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, ArgIdx);
+ Entry.setAttributes(&CS, ArgI);
Args.push_back(Entry);
}
@@ -874,10 +1042,31 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) {
TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));
for (auto &MO : Ops)
MIB.add(MO);
+
// Insert the Patchable Event Call instruction, that gets lowered properly.
return true;
}
+bool FastISel::selectXRayTypedEvent(const CallInst *I) {
+ const auto &Triple = TM.getTargetTriple();
+ if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ return true; // don't do anything to this instruction.
+ SmallVector<MachineOperand, 8> Ops;
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
+ /*IsDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
+ /*IsDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)),
+ /*IsDef=*/false));
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL));
+ for (auto &MO : Ops)
+ MIB.add(MO);
+
+ // Insert the Patchable Typed Event Call instruction, that gets lowered properly.
+ return true;
+}
/// Returns an AttributeList representing the attributes applied to the return
/// value of the given call.
@@ -1141,13 +1330,13 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
assert(DI->getVariable() && "Missing variable");
if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
- DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
}
const Value *Address = DI->getAddress();
if (!Address || isa<UndefValue>(Address)) {
- DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
}
@@ -1182,24 +1371,15 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
if (Op) {
assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
"Expected inlined-at fields to agree");
- if (Op->isReg()) {
- Op->setIsDebug(true);
- // A dbg.declare describes the address of a source variable, so lower it
- // into an indirect DBG_VALUE.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
- Op->getReg(), DI->getVariable(), DI->getExpression());
- } else
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::DBG_VALUE))
- .add(*Op)
- .addImm(0)
- .addMetadata(DI->getVariable())
- .addMetadata(DI->getExpression());
+ // A dbg.declare describes the address of a source variable, so lower it
+ // into an indirect DBG_VALUE.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
+ *Op, DI->getVariable(), DI->getExpression());
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
- DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
}
return true;
}
@@ -1242,7 +1422,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
- DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
}
return true;
}
@@ -1256,7 +1436,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
updateValueMap(II, ResultReg);
return true;
}
- case Intrinsic::invariant_group_barrier:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
case Intrinsic::expect: {
unsigned ResultReg = getRegForValue(II->getArgOperand(0));
if (!ResultReg)
@@ -1272,6 +1453,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
case Intrinsic::xray_customevent:
return selectXRayCustomEvent(II);
+ case Intrinsic::xray_typedevent:
+ return selectXRayTypedEvent(II);
}
return fastLowerIntrinsicCall(II);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 81347fa4bd46..42c7181dac41 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -118,6 +119,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
}
}
}
+ if (Personality == EHPersonality::Wasm_CXX) {
+ WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo();
+ calculateWasmEHInfo(&fn, EHInfo);
+ }
// Initialize the mapping of values to registers. This is only set up for
// instruction values that are used outside of the block that defines
@@ -226,9 +231,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
const Instruction *PadInst = BB.getFirstNonPHI();
// If this is a non-landingpad EH pad, mark this function as using
// funclets.
- // FIXME: SEH catchpads do not create funclets, so we could avoid setting
- // this in such cases in order to improve frame layout.
+ // FIXME: SEH catchpads do not create EH scope/funclets, so we could avoid
+ // setting this in such cases in order to improve frame layout.
if (!isa<LandingPadInst>(PadInst)) {
+ MF->setHasEHScopes(true);
MF->setHasEHFunclets(true);
MF->getFrameInfo().setHasOpaqueSPAdjustment(true);
}
@@ -281,28 +287,46 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
}
}
- if (!isFuncletEHPersonality(Personality))
- return;
-
- WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
+ if (isFuncletEHPersonality(Personality)) {
+ WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
- // Map all BB references in the WinEH data to MBBs.
- for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
- for (WinEHHandlerType &H : TBME.HandlerArray) {
- if (H.Handler)
- H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()];
+ // Map all BB references in the WinEH data to MBBs.
+ for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
+ for (WinEHHandlerType &H : TBME.HandlerArray) {
+ if (H.Handler)
+ H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()];
+ }
+ }
+ for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap)
+ if (UME.Cleanup)
+ UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()];
+ for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) {
+ const auto *BB = UME.Handler.get<const BasicBlock *>();
+ UME.Handler = MBBMap[BB];
+ }
+ for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) {
+ const auto *BB = CME.Handler.get<const BasicBlock *>();
+ CME.Handler = MBBMap[BB];
}
}
- for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap)
- if (UME.Cleanup)
- UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()];
- for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) {
- const BasicBlock *BB = UME.Handler.get<const BasicBlock *>();
- UME.Handler = MBBMap[BB];
- }
- for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) {
- const BasicBlock *BB = CME.Handler.get<const BasicBlock *>();
- CME.Handler = MBBMap[BB];
+
+ else if (Personality == EHPersonality::Wasm_CXX) {
+ WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo();
+ // Map all BB references in the WinEH data to MBBs.
+ DenseMap<BBOrMBB, BBOrMBB> NewMap;
+ for (auto &KV : EHInfo.EHPadUnwindMap) {
+ const auto *Src = KV.first.get<const BasicBlock *>();
+ const auto *Dst = KV.second.get<const BasicBlock *>();
+ NewMap[MBBMap[Src]] = MBBMap[Dst];
+ }
+ EHInfo.EHPadUnwindMap = std::move(NewMap);
+ NewMap.clear();
+ for (auto &KV : EHInfo.ThrowUnwindMap) {
+ const auto *Src = KV.first.get<const BasicBlock *>();
+ const auto *Dst = KV.second.get<const BasicBlock *>();
+ NewMap[MBBMap[Src]] = MBBMap[Dst];
+ }
+ EHInfo.ThrowUnwindMap = std::move(NewMap);
}
}
@@ -312,12 +336,14 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
void FunctionLoweringInfo::clear() {
MBBMap.clear();
ValueMap.clear();
+ VirtReg2Value.clear();
StaticAllocaMap.clear();
LiveOutRegInfo.clear();
VisitedBBs.clear();
ArgDbgValues.clear();
ByValArgFrameIndexMap.clear();
RegFixups.clear();
+ RegsWithFixups.clear();
StatepointStackSlots.clear();
StatepointSpillMaps.clear();
PreferredExtendType.clear();
@@ -483,7 +509,7 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
auto I = ByValArgFrameIndexMap.find(A);
if (I != ByValArgFrameIndexMap.end())
return I->second;
- DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
+ LLVM_DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
return INT_MAX;
}
@@ -547,3 +573,13 @@ FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const
}
return std::make_pair(It->second, false);
}
+
+const Value *
+FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) {
+ if (VirtReg2Value.empty()) {
+ for (auto &P : ValueMap) {
+ VirtReg2Value[P.second] = P.first;
+ }
+ }
+ return VirtReg2Value[Vreg];
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index cc9b41b4b487..d6171f3177d7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -394,11 +394,26 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
} else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
MIB.addFPImm(F->getConstantFPValue());
} else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+ unsigned VReg = R->getReg();
+ MVT OpVT = Op.getSimpleValueType();
+ const TargetRegisterClass *OpRC =
+ TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr;
+ const TargetRegisterClass *IIRC =
+ II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF))
+ : nullptr;
+
+ if (OpRC && IIRC && OpRC != IIRC &&
+ TargetRegisterInfo::isVirtualRegister(VReg)) {
+ unsigned NewVReg = MRI->createVirtualRegister(IIRC);
+ BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
+ VReg = NewVReg;
+ }
// Turn additional physreg operands into implicit uses on non-variadic
// instructions. This is used by call and return instructions passing
// arguments in registers.
bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic());
- MIB.addReg(R->getReg(), getImplRegState(Imp));
+ MIB.addReg(VReg, getImplRegState(Imp));
} else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
MIB.addRegMask(RM->getRegMask());
} else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
@@ -682,11 +697,15 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
if (SD->getKind() == SDDbgValue::FRAMEIX) {
// Stack address; this needs to be lowered in target-dependent fashion.
// EmitTargetCodeForFrameDebugValue is responsible for allocation.
- return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(SD->getFrameIx())
- .addImm(0)
- .addMetadata(Var)
- .addMetadata(Expr);
+ auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
+ .addFrameIndex(SD->getFrameIx());
+ if (SD->isIndirect())
+ // Push [fi + 0] onto the DIExpression stack.
+ FrameMI.addImm(0);
+ else
+ // Push fi onto the DIExpression stack.
+ FrameMI.addReg(0);
+ return FrameMI.addMetadata(Var).addMetadata(Expr);
}
// Otherwise, we're going to create an instruction here.
const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
@@ -705,6 +724,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
else
AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
/*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
+ } else if (SD->getKind() == SDDbgValue::VREG) {
+ MIB.addReg(SD->getVReg(), RegState::Debug);
} else if (SD->getKind() == SDDbgValue::CONST) {
const Value *V = SD->getConst();
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -736,6 +757,20 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
return &*MIB;
}
+MachineInstr *
+InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) {
+ MDNode *Label = SD->getLabel();
+ DebugLoc DL = SD->getDebugLoc();
+ assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+
+ const MCInstrDesc &II = TII->get(TargetOpcode::DBG_LABEL);
+ MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
+ MIB.addMetadata(Label);
+
+ return &*MIB;
+}
+
/// EmitMachineNode - Generate machine code for a target-specific node and
/// needed dependencies.
///
@@ -807,9 +842,34 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Add result register values for things that are defined by this
// instruction.
- if (NumResults)
+ if (NumResults) {
CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap);
+ // Transfer any IR flags from the SDNode to the MachineInstr
+ MachineInstr *MI = MIB.getInstr();
+ const SDNodeFlags Flags = Node->getFlags();
+ if (Flags.hasNoSignedZeros())
+ MI->setFlag(MachineInstr::MIFlag::FmNsz);
+
+ if (Flags.hasAllowReciprocal())
+ MI->setFlag(MachineInstr::MIFlag::FmArcp);
+
+ if (Flags.hasNoNaNs())
+ MI->setFlag(MachineInstr::MIFlag::FmNoNans);
+
+ if (Flags.hasNoInfs())
+ MI->setFlag(MachineInstr::MIFlag::FmNoInfs);
+
+ if (Flags.hasAllowContract())
+ MI->setFlag(MachineInstr::MIFlag::FmContract);
+
+ if (Flags.hasApproximateFuncs())
+ MI->setFlag(MachineInstr::MIFlag::FmAfn);
+
+ if (Flags.hasAllowReassociation())
+ MI->setFlag(MachineInstr::MIFlag::FmReassoc);
+ }
+
// Emit all of the actual operands of this instruction, adding them to the
// instruction as appropriate.
bool HasOptPRefs = NumDefs > NumResults;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 8a8a1bbd18f7..701b6368690b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -113,6 +113,9 @@ public:
MachineInstr *EmitDbgValue(SDDbgValue *SD,
DenseMap<SDValue, unsigned> &VRBaseMap);
+ /// Generate machine instruction for a dbg_label node.
+ MachineInstr *EmitDbgLabel(SDDbgLabel *SD);
+
/// EmitNode - Generate machine code for a node and needed dependencies.
///
void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 3a2fb0c0a836..2b7ba1ffb309 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -22,7 +22,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -41,6 +40,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -87,11 +87,11 @@ class SelectionDAGLegalize {
const TargetLowering &TLI;
SelectionDAG &DAG;
- /// \brief The set of nodes which have already been legalized. We hold a
+ /// The set of nodes which have already been legalized. We hold a
/// reference to it in order to update as necessary on node deletion.
SmallPtrSetImpl<SDNode *> &LegalizedNodes;
- /// \brief A set of all the nodes updated during legalization.
+ /// A set of all the nodes updated during legalization.
SmallSetVector<SDNode *, 16> *UpdatedNodes;
EVT getSetCCResultType(EVT VT) const {
@@ -107,7 +107,7 @@ public:
: TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG),
LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {}
- /// \brief Legalizes the given operation.
+ /// Legalizes the given operation.
void LegalizeOp(SDNode *Node);
private:
@@ -167,7 +167,7 @@ private:
SDValue NewIntValue) const;
SDValue ExpandFCOPYSIGN(SDNode *Node) const;
SDValue ExpandFABS(SDNode *Node) const;
- SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
+ SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT,
const SDLoc &dl);
SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
const SDLoc &dl);
@@ -200,8 +200,8 @@ public:
}
void ReplaceNode(SDNode *Old, SDNode *New) {
- DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
- dbgs() << " with: "; New->dump(&DAG));
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
assert(Old->getNumValues() == New->getNumValues() &&
"Replacing one node with another that produces a different number "
@@ -213,8 +213,8 @@ public:
}
void ReplaceNode(SDValue Old, SDValue New) {
- DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
- dbgs() << " with: "; New->dump(&DAG));
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
DAG.ReplaceAllUsesWith(Old, New);
if (UpdatedNodes)
@@ -223,13 +223,12 @@ public:
}
void ReplaceNode(SDNode *Old, const SDValue *New) {
- DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG));
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG));
DAG.ReplaceAllUsesWith(Old, New);
for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
- DEBUG(dbgs() << (i == 0 ? " with: "
- : " and: ");
- New[i]->dump(&DAG));
+ LLVM_DEBUG(dbgs() << (i == 0 ? " with: " : " and: ");
+ New[i]->dump(&DAG));
if (UpdatedNodes)
UpdatedNodes->insert(New[i].getNode());
}
@@ -408,7 +407,7 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
}
SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
- DEBUG(dbgs() << "Optimizing float store operations\n");
+ LLVM_DEBUG(dbgs() << "Optimizing float store operations\n");
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
// FIXME: We shouldn't do this for TargetConstantFP's.
// FIXME: move this to the DAG Combiner! Note that we can't regress due
@@ -477,7 +476,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
AAMDNodes AAInfo = ST->getAAInfo();
if (!ST->isTruncatingStore()) {
- DEBUG(dbgs() << "Legalizing store operation\n");
+ LLVM_DEBUG(dbgs() << "Legalizing store operation\n");
if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
ReplaceNode(ST, OptStore);
return;
@@ -495,15 +494,15 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
unsigned Align = ST->getAlignment();
const DataLayout &DL = DAG.getDataLayout();
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
- DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
+ LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
SDValue Result = TLI.expandUnalignedStore(ST, DAG);
ReplaceNode(SDValue(ST, 0), Result);
} else
- DEBUG(dbgs() << "Legal store\n");
+ LLVM_DEBUG(dbgs() << "Legal store\n");
break;
}
case TargetLowering::Custom: {
- DEBUG(dbgs() << "Trying custom lowering\n");
+ LLVM_DEBUG(dbgs() << "Trying custom lowering\n");
SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
if (Res && Res != SDValue(Node, 0))
ReplaceNode(SDValue(Node, 0), Res);
@@ -524,7 +523,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
return;
}
- DEBUG(dbgs() << "Legalizing truncating store operations\n");
+ LLVM_DEBUG(dbgs() << "Legalizing truncating store operations\n");
SDValue Value = ST->getValue();
EVT StVT = ST->getMemoryVT();
unsigned StWidth = StVT.getSizeInBits();
@@ -656,7 +655,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD) {
- DEBUG(dbgs() << "Legalizing non-extending load operation\n");
+ LLVM_DEBUG(dbgs() << "Legalizing non-extending load operation\n");
MVT VT = Node->getSimpleValueType(0);
SDValue RVal = SDValue(Node, 0);
SDValue RChain = SDValue(Node, 1);
@@ -706,7 +705,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
return;
}
- DEBUG(dbgs() << "Legalizing extending load operation\n");
+ LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n");
EVT SrcVT = LD->getMemoryVT();
unsigned SrcWidth = SrcVT.getSizeInBits();
unsigned Alignment = LD->getAlignment();
@@ -947,39 +946,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
}
}
-static TargetLowering::LegalizeAction
-getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
- unsigned EqOpc;
- switch (Opcode) {
- default: llvm_unreachable("Unexpected FP pseudo-opcode");
- case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
- case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
- case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
- case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
- case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
- case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
- case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
- case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break;
- case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break;
- case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break;
- case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
- case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
- case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
- }
-
- auto Action = TLI.getOperationAction(EqOpc, VT);
-
- // We don't currently handle Custom or Promote for strict FP pseudo-ops.
- // For now, we just expand for those cases.
- if (Action != TargetLowering::Legal)
- Action = TargetLowering::Expand;
-
- return Action;
-}
-
/// Return a legal replacement for the given operation, with all legal operands.
void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
- DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
// Allow illegal target nodes and illegal registers.
if (Node->getOpcode() == ISD::TargetConstant ||
@@ -1043,8 +1012,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::SETCC:
case ISD::BR_CC: {
unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
- Node->getOpcode() == ISD::SETCC ? 2 :
- Node->getOpcode() == ISD::SETCCE ? 3 : 1;
+ Node->getOpcode() == ISD::SETCC ? 2 : 1;
unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
ISD::CondCode CCCode =
@@ -1122,6 +1090,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
}
break;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:
@@ -1139,8 +1111,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
// ISD::STRICT_FSQRT.
- Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(),
- Node->getValueType(0));
+ Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
@@ -1202,10 +1174,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
switch (Action) {
case TargetLowering::Legal:
- DEBUG(dbgs() << "Legal node: nothing to do\n");
+ LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
return;
case TargetLowering::Custom:
- DEBUG(dbgs() << "Trying custom legalization\n");
+ LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
// FIXME: The handling for custom lowering with multiple results is
// a complete mess.
if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
@@ -1213,7 +1185,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
if (Node->getNumValues() == 1) {
- DEBUG(dbgs() << "Successfully custom legalized node\n");
+ LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
// We can just directly replace this node with the lowered value.
ReplaceNode(SDValue(Node, 0), Res);
return;
@@ -1222,11 +1194,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
SmallVector<SDValue, 8> ResultVals;
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
ResultVals.push_back(Res.getValue(i));
- DEBUG(dbgs() << "Successfully custom legalized node\n");
+ LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
ReplaceNode(Node, ResultVals.data());
return;
}
- DEBUG(dbgs() << "Could not custom legalize node\n");
+ LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
LLVM_FALLTHROUGH;
case TargetLowering::Expand:
if (ExpandNode(Node))
@@ -1623,6 +1595,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
NeedInvert = false;
+ bool NeedSwap = false;
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
default: llvm_unreachable("Unknown condition code action!");
case TargetLowering::Legal:
@@ -1630,23 +1603,37 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
break;
case TargetLowering::Expand: {
ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
- if (TLI.isCondCodeLegal(InvCC, OpVT)) {
+ if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
std::swap(LHS, RHS);
CC = DAG.getCondCode(InvCC);
return true;
}
+ // Swapping operands didn't work. Try inverting the condition.
+ InvCC = getSetCCInverse(CCCode, OpVT.isInteger());
+ if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ // If inverting the condition is not enough, try swapping operands
+ // on top of it.
+ InvCC = ISD::getSetCCSwappedOperands(InvCC);
+ NeedSwap = true;
+ }
+ if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ CC = DAG.getCondCode(InvCC);
+ NeedInvert = true;
+ if (NeedSwap)
+ std::swap(LHS, RHS);
+ return true;
+ }
+
ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
unsigned Opc = 0;
switch (CCCode) {
default: llvm_unreachable("Don't know how to expand this condition!");
case ISD::SETO:
- assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT)
- == TargetLowering::Legal
+ assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT)
&& "If SETO is expanded, SETOEQ must be legal!");
CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
case ISD::SETUO:
- assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT)
- == TargetLowering::Legal
+ assert(TLI.isCondCodeLegal(ISD::SETUNE, OpVT)
&& "If SETUO is expanded, SETUNE must be legal!");
CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break;
case ISD::SETOEQ:
@@ -1676,20 +1663,10 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
case ISD::SETGT:
case ISD::SETGE:
case ISD::SETLT:
- // We only support using the inverted operation, which is computed above
- // and not a different manner of supporting expanding these cases.
- llvm_unreachable("Don't know how to expand this condition!");
case ISD::SETNE:
case ISD::SETEQ:
- // Try inverting the result of the inverse condition.
- InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ;
- if (TLI.isCondCodeLegal(InvCC, OpVT)) {
- CC = DAG.getCondCode(InvCC);
- NeedInvert = true;
- return true;
- }
- // If inverting the condition didn't work then we have no means to expand
- // the condition.
+ // If all combinations of inverting the condition and swapping operands
+ // didn't work then we have no means to expand the condition.
llvm_unreachable("Don't know how to expand this condition!");
}
@@ -2036,12 +2013,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
if (!CallInfo.second.getNode()) {
- DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump());
+ LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump());
// It's a tailcall, return the chain (which is the DAG root).
return DAG.getRoot();
}
- DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump());
+ LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump());
return CallInfo.first;
}
@@ -2327,10 +2304,10 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
EVT DestVT,
const SDLoc &dl) {
// TODO: Should any fast-math-flags be set for the created nodes?
- DEBUG(dbgs() << "Legalizing INT_TO_FP\n");
+ LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n");
if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
- DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double "
- "expansion\n");
+ LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double "
+ "expansion\n");
// Get the stack frame index of a 8 byte buffer.
SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
@@ -2395,7 +2372,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
// and in all alternate rounding modes.
// TODO: Generalize this for use with other types.
if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
- DEBUG(dbgs() << "Converting unsigned i64 to f64\n");
+ LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f64\n");
SDValue TwoP52 =
DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64);
SDValue TwoP84PlusTwoP52 =
@@ -2418,7 +2395,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
// TODO: Generalize this for use with other types.
if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
- DEBUG(dbgs() << "Converting unsigned i64 to f32\n");
+ LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f32\n");
// For unsigned conversions, convert them to signed conversions using the
// algorithm from the x86_64 __floatundidf in compiler_rt.
if (!isSigned) {
@@ -2853,7 +2830,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
}
bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
- DEBUG(dbgs() << "Trying to expand node\n");
+ LLVM_DEBUG(dbgs() << "Trying to expand node\n");
SmallVector<SDValue, 8> Results;
SDLoc dl(Node);
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
@@ -3311,7 +3288,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
case ISD::FP_TO_FP16:
- DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
+ LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
SDValue Op = Node->getOperand(0);
MVT SVT = Op.getSimpleValueType();
@@ -3525,15 +3502,25 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::USUBO: {
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
- SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ?
- ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
- LHS, RHS);
+ bool IsAdd = Node->getOpcode() == ISD::UADDO;
+ // If ADD/SUBCARRY is legal, use that instead.
+ unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
+ if (TLI.isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
+ SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
+ SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
+ { LHS, RHS, CarryIn });
+ Results.push_back(SDValue(NodeCarry.getNode(), 0));
+ Results.push_back(SDValue(NodeCarry.getNode(), 1));
+ break;
+ }
+
+ SDValue Sum = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
+ LHS.getValueType(), LHS, RHS);
Results.push_back(Sum);
EVT ResultType = Node->getValueType(1);
EVT SetCCType = getSetCCResultType(Node->getValueType(0));
- ISD::CondCode CC
- = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT;
+ ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType));
@@ -3684,8 +3671,17 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
unsigned EntrySize =
DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
- Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
- DAG.getConstant(EntrySize, dl, Index.getValueType()));
+ // For power-of-two jumptable entry sizes convert multiplication to a shift.
+ // This transformation needs to be done here since otherwise the MIPS
+ // backend will end up emitting a three instruction multiply sequence
+ // instead of a single shift and MSP430 will call a runtime function.
+ if (llvm::isPowerOf2_32(EntrySize))
+ Index = DAG.getNode(
+ ISD::SHL, dl, Index.getValueType(), Index,
+ DAG.getConstant(llvm::Log2_32(EntrySize), dl, Index.getValueType()));
+ else
+ Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+ DAG.getConstant(EntrySize, dl, Index.getValueType()));
SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(),
Index, Table);
@@ -3701,7 +3697,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,
TLI.getPICJumpTableRelocBase(Table, DAG));
}
- Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr);
+
+ Tmp1 = TLI.expandIndirectJTBranch(dl, LD.getValue(1), Addr, DAG);
Results.push_back(Tmp1);
break;
}
@@ -3720,7 +3717,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (Tmp2.isUndef() ||
(Tmp2.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(Tmp2.getOperand(1)) &&
- dyn_cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1))
+ cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1))
Tmp3 = Tmp2;
else
Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
@@ -3759,7 +3756,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// illegal; expand it into a SELECT_CC.
EVT VT = Node->getValueType(0);
int TrueValue;
- switch (TLI.getBooleanContents(Tmp1->getValueType(0))) {
+ switch (TLI.getBooleanContents(Tmp1.getValueType())) {
case TargetLowering::ZeroOrOneBooleanContent:
case TargetLowering::UndefinedBooleanContent:
TrueValue = 1;
@@ -3784,7 +3781,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue CC = Node->getOperand(4);
ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get();
- if (TLI.isCondCodeLegal(CCOp, Tmp1.getSimpleValueType())) {
+ if (TLI.isCondCodeLegalOrCustom(CCOp, Tmp1.getSimpleValueType())) {
// If the condition code is legal, then we need to expand this
// node using SETCC and SELECT.
EVT CmpVT = Tmp1.getValueType();
@@ -3805,7 +3802,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// version (or vice versa).
ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp,
Tmp1.getValueType().isInteger());
- if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) {
+ if (TLI.isCondCodeLegalOrCustom(InvCC, Tmp1.getSimpleValueType())) {
// Use the new condition code and swap true and false
Legalized = true;
Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC);
@@ -3813,7 +3810,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// If The inverse is not legal, then try to swap the arguments using
// the inverse condition code.
ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC);
- if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) {
+ if (TLI.isCondCodeLegalOrCustom(SwapInvCC, Tmp1.getSimpleValueType())) {
// The swapped inverse condition is legal, so swap true and false,
// lhs and rhs.
Legalized = true;
@@ -3906,6 +3903,46 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
ReplaceNode(SDValue(Node, 0), Result);
break;
}
+ case ISD::ROTL:
+ case ISD::ROTR: {
+ bool IsLeft = Node->getOpcode() == ISD::ROTL;
+ SDValue Op0 = Node->getOperand(0), Op1 = Node->getOperand(1);
+ EVT ResVT = Node->getValueType(0);
+ EVT OpVT = Op0.getValueType();
+ assert(OpVT == ResVT &&
+ "The result and the operand types of rotate should match");
+ EVT ShVT = Op1.getValueType();
+ SDValue Width = DAG.getConstant(OpVT.getScalarSizeInBits(), dl, ShVT);
+
+ // If a rotate in the other direction is legal, use it.
+ unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
+ if (TLI.isOperationLegal(RevRot, ResVT)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1);
+ Results.push_back(DAG.getNode(RevRot, dl, ResVT, Op0, Sub));
+ break;
+ }
+
+ // Otherwise,
+ // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
+ // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
+ //
+ assert(isPowerOf2_32(OpVT.getScalarSizeInBits()) &&
+ "Expecting the type bitwidth to be a power of 2");
+ unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
+ unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
+ SDValue Width1 = DAG.getNode(ISD::SUB, dl, ShVT,
+ Width, DAG.getConstant(1, dl, ShVT));
+ SDValue NegOp1 = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1);
+ SDValue And0 = DAG.getNode(ISD::AND, dl, ShVT, Op1, Width1);
+ SDValue And1 = DAG.getNode(ISD::AND, dl, ShVT, NegOp1, Width1);
+
+ SDValue Or = DAG.getNode(ISD::OR, dl, ResVT,
+ DAG.getNode(ShOpc, dl, ResVT, Op0, And0),
+ DAG.getNode(HsOpc, dl, ResVT, Op0, And1));
+ Results.push_back(Or);
+ break;
+ }
+
case ISD::GLOBAL_OFFSET_TABLE:
case ISD::GlobalAddress:
case ISD::GlobalTLSAddress:
@@ -3921,19 +3958,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Replace the original node with the legalized result.
if (Results.empty()) {
- DEBUG(dbgs() << "Cannot expand node\n");
+ LLVM_DEBUG(dbgs() << "Cannot expand node\n");
return false;
}
- DEBUG(dbgs() << "Succesfully expanded node\n");
+ LLVM_DEBUG(dbgs() << "Succesfully expanded node\n");
ReplaceNode(Node, Results.data());
return true;
}
void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
- DEBUG(dbgs() << "Trying to convert node to libcall\n");
+ LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n");
SmallVector<SDValue, 8> Results;
SDLoc dl(Node);
+ // FIXME: Check flags on the node to see if we can use a finite call.
+ bool CanUseFiniteLibCall = TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath;
unsigned Opc = Node->getOpcode();
switch (Opc) {
case ISD::ATOMIC_FENCE: {
@@ -3962,6 +4001,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
@@ -4028,33 +4068,68 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
case ISD::FLOG:
case ISD::STRICT_FLOG:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
- RTLIB::LOG_F80, RTLIB::LOG_F128,
- RTLIB::LOG_PPCF128));
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32,
+ RTLIB::LOG_FINITE_F64,
+ RTLIB::LOG_FINITE_F80,
+ RTLIB::LOG_FINITE_F128,
+ RTLIB::LOG_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128));
break;
case ISD::FLOG2:
case ISD::STRICT_FLOG2:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
- RTLIB::LOG2_F80, RTLIB::LOG2_F128,
- RTLIB::LOG2_PPCF128));
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32,
+ RTLIB::LOG2_FINITE_F64,
+ RTLIB::LOG2_FINITE_F80,
+ RTLIB::LOG2_FINITE_F128,
+ RTLIB::LOG2_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128));
break;
case ISD::FLOG10:
case ISD::STRICT_FLOG10:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
- RTLIB::LOG10_F80, RTLIB::LOG10_F128,
- RTLIB::LOG10_PPCF128));
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32,
+ RTLIB::LOG10_FINITE_F64,
+ RTLIB::LOG10_FINITE_F80,
+ RTLIB::LOG10_FINITE_F128,
+ RTLIB::LOG10_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128));
break;
case ISD::FEXP:
case ISD::STRICT_FEXP:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
- RTLIB::EXP_F80, RTLIB::EXP_F128,
- RTLIB::EXP_PPCF128));
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32,
+ RTLIB::EXP_FINITE_F64,
+ RTLIB::EXP_FINITE_F80,
+ RTLIB::EXP_FINITE_F128,
+ RTLIB::EXP_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128));
break;
case ISD::FEXP2:
case ISD::STRICT_FEXP2:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
- RTLIB::EXP2_F80, RTLIB::EXP2_F128,
- RTLIB::EXP2_PPCF128));
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32,
+ RTLIB::EXP2_FINITE_F64,
+ RTLIB::EXP2_FINITE_F80,
+ RTLIB::EXP2_FINITE_F128,
+ RTLIB::EXP2_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128));
break;
case ISD::FTRUNC:
Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
@@ -4100,9 +4175,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
case ISD::FPOW:
case ISD::STRICT_FPOW:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
- RTLIB::POW_F80, RTLIB::POW_F128,
- RTLIB::POW_PPCF128));
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32,
+ RTLIB::POW_FINITE_F64,
+ RTLIB::POW_FINITE_F80,
+ RTLIB::POW_FINITE_F128,
+ RTLIB::POW_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128));
break;
case ISD::FDIV:
Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
@@ -4186,10 +4268,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
// Replace the original node with the legalized result.
if (!Results.empty()) {
- DEBUG(dbgs() << "Successfully converted node to libcall\n");
+ LLVM_DEBUG(dbgs() << "Successfully converted node to libcall\n");
ReplaceNode(Node, Results.data());
} else
- DEBUG(dbgs() << "Could not convert node to libcall\n");
+ LLVM_DEBUG(dbgs() << "Could not convert node to libcall\n");
}
// Determine the vector type to use in place of an original scalar element when
@@ -4203,7 +4285,7 @@ static MVT getPromotedVectorElementType(const TargetLowering &TLI,
}
void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
- DEBUG(dbgs() << "Trying to promote node\n");
+ LLVM_DEBUG(dbgs() << "Trying to promote node\n");
SmallVector<SDValue, 8> Results;
MVT OVT = Node->getSimpleValueType(0);
if (Node->getOpcode() == ISD::UINT_TO_FP ||
@@ -4256,7 +4338,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
ISD::SRL, dl, NVT, Tmp1,
DAG.getConstant(DiffBits, dl,
TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
- Results.push_back(Tmp1);
+
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
break;
}
case ISD::FP_TO_UINT:
@@ -4640,10 +4723,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
// Replace the original node with the legalized result.
if (!Results.empty()) {
- DEBUG(dbgs() << "Successfully promoted node\n");
+ LLVM_DEBUG(dbgs() << "Successfully promoted node\n");
ReplaceNode(Node, Results.data());
} else
- DEBUG(dbgs() << "Could not promote node\n");
+ LLVM_DEBUG(dbgs() << "Could not promote node\n");
}
/// This is the entry point for the file.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index e28a3aa47ca3..b0ae1e0399fb 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -47,8 +47,8 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue R = SDValue();
switch (N->getOpcode()) {
@@ -738,8 +738,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue Res = SDValue();
switch (N->getOpcode()) {
@@ -1039,7 +1039,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
/// have invalid operands or may have other results that need promotion, we just
/// know that (at least) one result needs expansion.
void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
SDValue Lo, Hi;
Lo = Hi = SDValue();
@@ -1538,7 +1538,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
/// types of the node are known to be legal, but other operands of the node may
/// need promotion or expansion as well as the specified one.
bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
SDValue Res = SDValue();
// See if the target wants to custom expand this node.
@@ -1658,18 +1658,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
EVT RVT = N->getValueType(0);
SDLoc dl(N);
- // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
- // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
- if (RVT == MVT::i32) {
- assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
- "Logic only correct for ppcf128!");
- SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128,
- N->getOperand(0), DAG.getValueType(MVT::f64));
- Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res,
- DAG.getIntPtrConstant(1, dl));
- return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
- }
-
RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first;
@@ -1679,31 +1667,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
EVT RVT = N->getValueType(0);
SDLoc dl(N);
- // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
- // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
- if (RVT == MVT::i32) {
- assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
- "Logic only correct for ppcf128!");
- const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
- APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
- SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
- // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
- // FIXME: generated code sucks.
- // TODO: Are there fast-math-flags to propagate to this FSUB?
- return DAG.getSelectCC(dl, N->getOperand(0), Tmp,
- DAG.getNode(ISD::ADD, dl, MVT::i32,
- DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
- DAG.getNode(ISD::FSUB, dl,
- MVT::ppcf128,
- N->getOperand(0),
- Tmp)),
- DAG.getConstant(0x80000000, dl,
- MVT::i32)),
- DAG.getNode(ISD::FP_TO_SINT, dl,
- MVT::i32, N->getOperand(0)),
- ISD::SETGE);
- }
-
RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0),
@@ -2139,13 +2102,12 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
// Load the value as an integer value with the same number of bits.
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
- auto MMOFlags =
- L->getMemOperand()->getFlags() &
- ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT,
SDLoc(N), L->getChain(), L->getBasePtr(),
L->getOffset(), L->getPointerInfo(), IVT,
- L->getAlignment(), MMOFlags, L->getAAInfo());
+ L->getAlignment(),
+ L->getMemOperand()->getFlags(),
+ L->getAAInfo());
// Legalize the chain result by replacing uses of the old value chain with the
// new one
ReplaceValueWith(SDValue(N, 1), newL.getValue(1));
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 29f0bb475b08..63a1ea13a5f5 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -36,12 +36,13 @@ using namespace llvm;
/// may also have invalid operands or may have other results that need
/// expansion, we just know that (at least) one result needs promotion.
void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue Res = SDValue();
// See if the target wants to custom expand this node.
if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
- DEBUG(dbgs() << "Node has been custom expanded, done\n");
+ LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");
return;
}
@@ -146,6 +147,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
@@ -501,7 +503,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
SDLoc dl(N);
SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(),
- N->getIndex()};
+ N->getIndex(), N->getScale() };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
N->getMemOperand());
@@ -586,43 +588,39 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
- EVT SVT = getSetCCResultType(N->getOperand(0).getValueType());
-
+ EVT InVT = N->getOperand(0).getValueType();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- // Only use the result of getSetCCResultType if it is legal,
- // otherwise just use the promoted result type (NVT).
- if (!TLI.isTypeLegal(SVT))
- SVT = NVT;
+ EVT SVT = getSetCCResultType(InVT);
+
+ // If we got back a type that needs to be promoted, this likely means the
+ // the input type also needs to be promoted. So get the promoted type for
+ // the input and try the query again.
+ if (getTypeAction(SVT) == TargetLowering::TypePromoteInteger) {
+ if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {
+ InVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ SVT = getSetCCResultType(InVT);
+ } else {
+ // Input type isn't promoted, just use the default promoted type.
+ SVT = NVT;
+ }
+ }
SDLoc dl(N);
assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&
"Vector compare must return a vector result!");
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- if (LHS.getValueType() != RHS.getValueType()) {
- if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger &&
- !LHS.getValueType().isVector())
- LHS = GetPromotedInteger(LHS);
- if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger &&
- !RHS.getValueType().isVector())
- RHS = GetPromotedInteger(RHS);
- }
-
// Get the SETCC result using the canonical SETCC type.
- SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS,
- N->getOperand(2));
+ SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
// Convert to the expected type.
return DAG.getSExtOrTrunc(SetCC, dl, NVT);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
- SDValue LHS = N->getOperand(0);
+ SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
- if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger)
- LHS = GetPromotedInteger(LHS);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS);
@@ -661,22 +659,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
// The input value must be properly sign extended.
- if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger)
- LHS = SExtPromotedInteger(LHS);
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
// The input value must be properly zero extended.
- if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger)
- LHS = ZExtPromotedInteger(LHS);
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);
@@ -904,11 +898,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
/// result types of the node are known to be legal, but other operands of the
/// node may need promotion or expansion as well as the specified one.
bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue Res = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
- DEBUG(dbgs() << "Node has been custom lowered, done\n");
+ LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
return false;
}
@@ -1001,11 +996,11 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
// than the width of NewLHS/NewRH, we can avoid inserting real truncate
// instruction, which is redudant eventually.
unsigned OpLEffectiveBits =
- OpL.getValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
+ OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
unsigned OpREffectiveBits =
- OpR.getValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1;
- if (OpLEffectiveBits <= NewLHS.getValueSizeInBits() &&
- OpREffectiveBits <= NewRHS.getValueSizeInBits()) {
+ OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1;
+ if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() &&
+ OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) {
NewLHS = OpL;
NewRHS = OpR;
} else {
@@ -1356,7 +1351,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
/// have invalid operands or may have other results that need promotion, we just
/// know that (at least) one result needs expansion.
void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue Lo, Hi;
Lo = Hi = SDValue();
@@ -1413,6 +1409,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
@@ -2893,7 +2890,8 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
/// result types of the node are known to be legal, but other operands of the
/// node may need promotion or expansion as well as the specified one.
bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue Res = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
@@ -2915,7 +2913,6 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
- case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break;
case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break;
case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
@@ -3051,15 +3048,14 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
return;
}
- // Lower with SETCCE or SETCCCARRY if the target supports it.
+ // Lower with SETCCCARRY if the target supports it.
EVT HiVT = LHSHi.getValueType();
EVT ExpandVT = TLI.getTypeToExpandTo(*DAG.getContext(), HiVT);
bool HasSETCCCARRY = TLI.isOperationLegalOrCustom(ISD::SETCCCARRY, ExpandVT);
// FIXME: Make all targets support this, then remove the other lowering.
- if (HasSETCCCARRY ||
- TLI.getOperationAction(ISD::SETCCE, ExpandVT) == TargetLowering::Custom) {
- // SETCCE/SETCCCARRY can detect < and >= directly. For > and <=, flip
+ if (HasSETCCCARRY) {
+ // SETCCCARRY can detect < and >= directly. For > and <=, flip
// operands and condition code.
bool FlipOperands = false;
switch (CCCode) {
@@ -3074,17 +3070,15 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
std::swap(LHSHi, RHSHi);
}
// Perform a wide subtraction, feeding the carry from the low part into
- // SETCCE/SETCCCARRY. The SETCCE/SETCCCARRY operation is essentially
- // looking at the high part of the result of LHS - RHS. It is negative
- // iff LHS < RHS. It is zero or positive iff LHS >= RHS.
+ // SETCCCARRY. The SETCCCARRY operation is essentially looking at the high
+ // part of the result of LHS - RHS. It is negative iff LHS < RHS. It is
+ // zero or positive iff LHS >= RHS.
EVT LoVT = LHSLo.getValueType();
- SDVTList VTList = DAG.getVTList(
- LoVT, HasSETCCCARRY ? getSetCCResultType(LoVT) : MVT::Glue);
- SDValue LowCmp = DAG.getNode(HasSETCCCARRY ? ISD::USUBO : ISD::SUBC, dl,
- VTList, LHSLo, RHSLo);
- SDValue Res = DAG.getNode(HasSETCCCARRY ? ISD::SETCCCARRY : ISD::SETCCE, dl,
- getSetCCResultType(HiVT), LHSHi, RHSHi,
- LowCmp.getValue(1), DAG.getCondCode(CCCode));
+ SDVTList VTList = DAG.getVTList(LoVT, getSetCCResultType(LoVT));
+ SDValue LowCmp = DAG.getNode(ISD::USUBO, dl, VTList, LHSLo, RHSLo);
+ SDValue Res = DAG.getNode(ISD::SETCCCARRY, dl, getSetCCResultType(HiVT),
+ LHSHi, RHSHi, LowCmp.getValue(1),
+ DAG.getCondCode(CCCode));
NewLHS = Res;
NewRHS = SDValue();
return;
@@ -3152,24 +3146,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
DAG.UpdateNodeOperands(N, NewLHS, NewRHS, DAG.getCondCode(CCCode)), 0);
}
-SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- SDValue Carry = N->getOperand(2);
- SDValue Cond = N->getOperand(3);
- SDLoc dl = SDLoc(N);
-
- SDValue LHSLo, LHSHi, RHSLo, RHSHi;
- GetExpandedInteger(LHS, LHSLo, LHSHi);
- GetExpandedInteger(RHS, RHSLo, RHSHi);
-
- // Expand to a SUBE for the low part and a smaller SETCCE for the high.
- SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue);
- SDValue LowCmp = DAG.getNode(ISD::SUBE, dl, VTList, LHSLo, RHSLo, Carry);
- return DAG.getNode(ISD::SETCCE, dl, N->getValueType(0), LHSHi, RHSHi,
- LowCmp.getValue(1), Cond);
-}
-
SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -3497,21 +3473,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
assert(NumElem * NumOperands == NumOutElem &&
"Unexpected number of elements");
- // If the input type is legal and we can promote it to a legal type with the
- // same element size, go ahead do that to create a new concat.
- if (getTypeAction(N->getOperand(0).getValueType()) ==
- TargetLowering::TypeLegal) {
- EVT InPromotedTy = EVT::getVectorVT(*DAG.getContext(), OutElemTy, NumElem);
- if (TLI.isTypeLegal(InPromotedTy)) {
- SmallVector<SDValue, 8> Ops(NumOperands);
- for (unsigned i = 0; i < NumOperands; ++i) {
- Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, InPromotedTy,
- N->getOperand(i));
- }
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, NOutVT, Ops);
- }
- }
-
// Take the elements from the first vector.
SmallVector<SDValue, 8> Ops(NumOutElem);
for (unsigned i = 0; i < NumOperands; ++i) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 4438ee7878b8..a9f144c06e9a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -84,9 +84,11 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
SDValue Res(&Node, i);
EVT VT = Res.getValueType();
bool Failed = false;
+ // Don't create a value in map.
+ auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0;
unsigned Mapped = 0;
- if (ReplacedValues.find(Res) != ReplacedValues.end()) {
+ if (ResId && (ReplacedValues.find(ResId) != ReplacedValues.end())) {
Mapped |= 1;
// Check that remapped values are only used by nodes marked NewNode.
for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end();
@@ -97,30 +99,32 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
// Check that the final result of applying ReplacedValues is not
// marked NewNode.
- SDValue NewVal = ReplacedValues[Res];
- DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal);
+ auto NewValId = ReplacedValues[ResId];
+ auto I = ReplacedValues.find(NewValId);
while (I != ReplacedValues.end()) {
- NewVal = I->second;
- I = ReplacedValues.find(NewVal);
+ NewValId = I->second;
+ I = ReplacedValues.find(NewValId);
}
+ SDValue NewVal = getSDValue(NewValId);
+ (void)NewVal;
assert(NewVal.getNode()->getNodeId() != NewNode &&
"ReplacedValues maps to a new node!");
}
- if (PromotedIntegers.find(Res) != PromotedIntegers.end())
+ if (ResId && PromotedIntegers.find(ResId) != PromotedIntegers.end())
Mapped |= 2;
- if (SoftenedFloats.find(Res) != SoftenedFloats.end())
+ if (ResId && SoftenedFloats.find(ResId) != SoftenedFloats.end())
Mapped |= 4;
- if (ScalarizedVectors.find(Res) != ScalarizedVectors.end())
+ if (ResId && ScalarizedVectors.find(ResId) != ScalarizedVectors.end())
Mapped |= 8;
- if (ExpandedIntegers.find(Res) != ExpandedIntegers.end())
+ if (ResId && ExpandedIntegers.find(ResId) != ExpandedIntegers.end())
Mapped |= 16;
- if (ExpandedFloats.find(Res) != ExpandedFloats.end())
+ if (ResId && ExpandedFloats.find(ResId) != ExpandedFloats.end())
Mapped |= 32;
- if (SplitVectors.find(Res) != SplitVectors.end())
+ if (ResId && SplitVectors.find(ResId) != SplitVectors.end())
Mapped |= 64;
- if (WidenedVectors.find(Res) != WidenedVectors.end())
+ if (ResId && WidenedVectors.find(ResId) != WidenedVectors.end())
Mapped |= 128;
- if (PromotedFloats.find(Res) != PromotedFloats.end())
+ if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end())
Mapped |= 256;
if (Node.getNodeId() != Processed) {
@@ -224,9 +228,9 @@ bool DAGTypeLegalizer::run() {
assert(N->getNodeId() == ReadyToProcess &&
"Node should be ready if on worklist!");
- DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG));
if (IgnoreNodeResults(N)) {
- DEBUG(dbgs() << "Ignoring node results\n");
+ LLVM_DEBUG(dbgs() << "Ignoring node results\n");
goto ScanOperands;
}
@@ -234,11 +238,11 @@ bool DAGTypeLegalizer::run() {
// types are illegal.
for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
EVT ResultVT = N->getValueType(i);
- DEBUG(dbgs() << "Analyzing result type: " <<
- ResultVT.getEVTString() << "\n");
+ LLVM_DEBUG(dbgs() << "Analyzing result type: " << ResultVT.getEVTString()
+ << "\n");
switch (getTypeAction(ResultVT)) {
case TargetLowering::TypeLegal:
- DEBUG(dbgs() << "Legal result type\n");
+ LLVM_DEBUG(dbgs() << "Legal result type\n");
break;
// The following calls must take care of *all* of the node's results,
// not just the illegal result they were passed (this includes results
@@ -296,11 +300,11 @@ ScanOperands:
continue;
const auto Op = N->getOperand(i);
- DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG));
+ LLVM_DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG));
EVT OpVT = Op.getValueType();
switch (getTypeAction(OpVT)) {
case TargetLowering::TypeLegal:
- DEBUG(dbgs() << "Legal operand\n");
+ LLVM_DEBUG(dbgs() << "Legal operand\n");
continue;
// The following calls must either replace all of the node's results
// using ReplaceValueWith, and return "false"; or update the node's
@@ -370,7 +374,8 @@ ScanOperands:
}
if (i == NumOperands) {
- DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG);
+ dbgs() << "\n");
}
}
NodeDone:
@@ -490,9 +495,6 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)
return N;
- // Remove any stale map entries.
- ExpungeNode(N);
-
// Okay, we know that this node is new. Recursively walk all of its operands
// to see if they are new also. The depth of this walk is bounded by the size
// of the new tree that was constructed (usually 2-3 nodes), so we don't worry
@@ -543,7 +545,6 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
// to remap the operands, since they are the same as the operands we
// remapped above.
N = M;
- ExpungeNode(N);
}
}
@@ -564,100 +565,25 @@ void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
RemapValue(Val);
}
-/// If N has a bogus mapping in ReplacedValues, eliminate it.
-/// This can occur when a node is deleted then reallocated as a new node -
-/// the mapping in ReplacedValues applies to the deleted node, not the new
-/// one.
-/// The only map that can have a deleted node as a source is ReplacedValues.
-/// Other maps can have deleted nodes as targets, but since their looked-up
-/// values are always immediately remapped using RemapValue, resulting in a
-/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue
-/// always performs correct mappings. In order to keep the mapping correct,
-/// ExpungeNode should be called on any new nodes *before* adding them as
-/// either source or target to ReplacedValues (which typically means calling
-/// Expunge when a new node is first seen, since it may no longer be marked
-/// NewNode by the time it is added to ReplacedValues).
-void DAGTypeLegalizer::ExpungeNode(SDNode *N) {
- if (N->getNodeId() != NewNode)
- return;
-
- // If N is not remapped by ReplacedValues then there is nothing to do.
- unsigned i, e;
- for (i = 0, e = N->getNumValues(); i != e; ++i)
- if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end())
- break;
-
- if (i == e)
- return;
-
- // Remove N from all maps - this is expensive but rare.
-
- for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(),
- E = PromotedIntegers.end(); I != E; ++I) {
- assert(I->first.getNode() != N);
- RemapValue(I->second);
- }
-
- for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(),
- E = SoftenedFloats.end(); I != E; ++I) {
- assert(I->first.getNode() != N);
- RemapValue(I->second);
- }
-
- for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(),
- E = ScalarizedVectors.end(); I != E; ++I) {
- assert(I->first.getNode() != N);
- RemapValue(I->second);
- }
-
- for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(),
- E = WidenedVectors.end(); I != E; ++I) {
- assert(I->first.getNode() != N);
- RemapValue(I->second);
- }
-
- for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
- I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){
- assert(I->first.getNode() != N);
- RemapValue(I->second.first);
- RemapValue(I->second.second);
- }
-
- for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
- I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) {
- assert(I->first.getNode() != N);
- RemapValue(I->second.first);
- RemapValue(I->second.second);
- }
-
- for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
- I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) {
- assert(I->first.getNode() != N);
- RemapValue(I->second.first);
- RemapValue(I->second.second);
- }
-
- for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(),
- E = ReplacedValues.end(); I != E; ++I)
- RemapValue(I->second);
-
- for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
- ReplacedValues.erase(SDValue(N, i));
-}
-
/// If the specified value was already legalized to another value,
/// replace it by that value.
-void DAGTypeLegalizer::RemapValue(SDValue &N) {
- DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N);
+void DAGTypeLegalizer::RemapValue(SDValue &V) {
+ auto Id = getTableId(V);
+ V = getSDValue(Id);
+}
+
+void DAGTypeLegalizer::RemapId(TableId &Id) {
+ auto I = ReplacedValues.find(Id);
if (I != ReplacedValues.end()) {
+ assert(Id != I->second && "Id is mapped to itself.");
// Use path compression to speed up future lookups if values get multiply
// replaced with other values.
- RemapValue(I->second);
- N = I->second;
+ RemapId(I->second);
+ Id = I->second;
- // Note that it is possible to have N.getNode()->getNodeId() == NewNode at
- // this point because it is possible for a node to be put in the map before
- // being processed.
+ // Note that N = IdToValueMap[Id] it is possible to have
+ // N.getNode()->getNodeId() == NewNode at this point because it is possible
+ // for a node to be put in the map before being processed.
}
}
@@ -714,19 +640,22 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
assert(From.getNode() != To.getNode() && "Potential legalization loop!");
// If expansion produced new nodes, make sure they are properly marked.
- ExpungeNode(From.getNode());
- AnalyzeNewValue(To); // Expunges To.
+ AnalyzeNewValue(To);
// Anything that used the old node should now use the new one. Note that this
// can potentially cause recursive merging.
SmallSetVector<SDNode*, 16> NodesToAnalyze;
NodeUpdateListener NUL(*this, NodesToAnalyze);
do {
- DAG.ReplaceAllUsesOfValueWith(From, To);
- // The old node may still be present in a map like ExpandedIntegers or
- // PromotedIntegers. Inform maps about the replacement.
- ReplacedValues[From] = To;
+ // The old node may be present in a map like ExpandedIntegers or
+ // PromotedIntegers. Inform maps about the replacement.
+ auto FromId = getTableId(From);
+ auto ToId = getTableId(To);
+
+ if (FromId != ToId)
+ ReplacedValues[FromId] = ToId;
+ DAG.ReplaceAllUsesOfValueWith(From, To);
// Process the list of nodes that need to be reanalyzed.
while (!NodesToAnalyze.empty()) {
@@ -751,12 +680,15 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
SDValue NewVal(M, i);
if (M->getNodeId() == Processed)
RemapValue(NewVal);
- DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal);
// OldVal may be a target of the ReplacedValues map which was marked
// NewNode to force reanalysis because it was updated. Ensure that
// anything that ReplacedValues mapped to OldVal will now be mapped
// all the way to NewVal.
- ReplacedValues[OldVal] = NewVal;
+ auto OldValId = getTableId(OldVal);
+ auto NewValId = getTableId(NewVal);
+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal);
+ if (OldValId != NewValId)
+ ReplacedValues[OldValId] = NewValId;
}
// The original node continues to exist in the DAG, marked NewNode.
}
@@ -773,9 +705,11 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
"Invalid type for promoted integer");
AnalyzeNewValue(Result);
- SDValue &OpEntry = PromotedIntegers[Op];
- assert(!OpEntry.getNode() && "Node is already promoted!");
- OpEntry = Result;
+ auto &OpIdEntry = PromotedIntegers[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already promoted!");
+ OpIdEntry = getTableId(Result);
+
+ DAG.transferDbgValues(Op, Result);
}
void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
@@ -788,15 +722,15 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
"Invalid type for softened float");
AnalyzeNewValue(Result);
- SDValue &OpEntry = SoftenedFloats[Op];
+ auto &OpIdEntry = SoftenedFloats[getTableId(Op)];
// Allow repeated calls to save f128 type nodes
// or any node with type that transforms to itself.
// Many operations on these types are not softened.
- assert((!OpEntry.getNode()||
+ assert(((OpIdEntry == 0) ||
Op.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
"Node is already converted to integer!");
- OpEntry = Result;
+ OpIdEntry = getTableId(Result);
}
void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) {
@@ -805,9 +739,9 @@ void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) {
"Invalid type for promoted float");
AnalyzeNewValue(Result);
- SDValue &OpEntry = PromotedFloats[Op];
- assert(!OpEntry.getNode() && "Node is already promoted!");
- OpEntry = Result;
+ auto &OpIdEntry = PromotedFloats[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already promoted!");
+ OpIdEntry = getTableId(Result);
}
void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
@@ -818,19 +752,17 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
"Invalid type for scalarized vector");
AnalyzeNewValue(Result);
- SDValue &OpEntry = ScalarizedVectors[Op];
- assert(!OpEntry.getNode() && "Node is already scalarized!");
- OpEntry = Result;
+ auto &OpIdEntry = ScalarizedVectors[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already scalarized!");
+ OpIdEntry = getTableId(Result);
}
void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
SDValue &Hi) {
- std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
- RemapValue(Entry.first);
- RemapValue(Entry.second);
- assert(Entry.first.getNode() && "Operand isn't expanded");
- Lo = Entry.first;
- Hi = Entry.second;
+ std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)];
+ assert((Entry.first != 0) && "Operand isn't expanded");
+ Lo = getSDValue(Entry.first);
+ Hi = getSDValue(Entry.second);
}
void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
@@ -856,20 +788,18 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
}
// Remember that this is the result of the node.
- std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
- assert(!Entry.first.getNode() && "Node already expanded");
- Entry.first = Lo;
- Entry.second = Hi;
+ std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)];
+ assert((Entry.first == 0) && "Node already expanded");
+ Entry.first = getTableId(Lo);
+ Entry.second = getTableId(Hi);
}
void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
SDValue &Hi) {
- std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
- RemapValue(Entry.first);
- RemapValue(Entry.second);
- assert(Entry.first.getNode() && "Operand isn't expanded");
- Lo = Entry.first;
- Hi = Entry.second;
+ std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)];
+ assert((Entry.first != 0) && "Operand isn't expanded");
+ Lo = getSDValue(Entry.first);
+ Hi = getSDValue(Entry.second);
}
void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
@@ -882,21 +812,19 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
AnalyzeNewValue(Lo);
AnalyzeNewValue(Hi);
- // Remember that this is the result of the node.
- std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
- assert(!Entry.first.getNode() && "Node already expanded");
- Entry.first = Lo;
- Entry.second = Hi;
+ std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)];
+ assert((Entry.first == 0) && "Node already expanded");
+ Entry.first = getTableId(Lo);
+ Entry.second = getTableId(Hi);
}
void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
SDValue &Hi) {
- std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
- RemapValue(Entry.first);
- RemapValue(Entry.second);
- assert(Entry.first.getNode() && "Operand isn't split");
- Lo = Entry.first;
- Hi = Entry.second;
+ std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)];
+ Lo = getSDValue(Entry.first);
+ Hi = getSDValue(Entry.second);
+ assert(Lo.getNode() && "Operand isn't split");
+ ;
}
void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
@@ -912,10 +840,10 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
AnalyzeNewValue(Hi);
// Remember that this is the result of the node.
- std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
- assert(!Entry.first.getNode() && "Node already split");
- Entry.first = Lo;
- Entry.second = Hi;
+ std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)];
+ assert((Entry.first == 0) && "Node already split");
+ Entry.first = getTableId(Lo);
+ Entry.second = getTableId(Hi);
}
void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
@@ -924,9 +852,9 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
"Invalid type for widened vector");
AnalyzeNewValue(Result);
- SDValue &OpEntry = WidenedVectors[Op];
- assert(!OpEntry.getNode() && "Node already widened!");
- OpEntry = Result;
+ auto &OpIdEntry = WidenedVectors[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node already widened!");
+ OpIdEntry = getTableId(Result);
}
@@ -1064,11 +992,11 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
LVT.getSizeInBits() + HVT.getSizeInBits());
+ EVT ShiftAmtVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout(), false);
Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
- DAG.getConstant(LVT.getSizeInBits(), dlHi,
- TLI.getPointerTy(DAG.getDataLayout())));
+ DAG.getConstant(LVT.getSizeInBits(), dlHi, ShiftAmtVT));
return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 64cb80e0d853..2c6b1ee7900f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -93,46 +93,81 @@ private:
N->getOpcode() == ISD::Register;
}
+ // Bijection from SDValue to unique id. As each created node gets a
+ // new id we do not need to worry about reuse expunging. Should we
+ // run out of ids, we can do a one time expensive compactifcation.
+ typedef unsigned TableId;
+
+ TableId NextValueId = 1;
+
+ SmallDenseMap<SDValue, TableId, 8> ValueToIdMap;
+ SmallDenseMap<TableId, SDValue, 8> IdToValueMap;
+
/// For integer nodes that are below legal width, this map indicates what
/// promoted value to use.
- SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers;
+ SmallDenseMap<TableId, TableId, 8> PromotedIntegers;
/// For integer nodes that need to be expanded this map indicates which
/// operands are the expanded version of the input.
- SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers;
+ SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedIntegers;
/// For floating-point nodes converted to integers of the same size, this map
/// indicates the converted value to use.
- SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats;
+ SmallDenseMap<TableId, TableId, 8> SoftenedFloats;
/// For floating-point nodes that have a smaller precision than the smallest
/// supported precision, this map indicates what promoted value to use.
- SmallDenseMap<SDValue, SDValue, 8> PromotedFloats;
+ SmallDenseMap<TableId, TableId, 8> PromotedFloats;
/// For float nodes that need to be expanded this map indicates which operands
/// are the expanded version of the input.
- SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats;
+ SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedFloats;
/// For nodes that are <1 x ty>, this map indicates the scalar value of type
/// 'ty' to use.
- SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors;
+ SmallDenseMap<TableId, TableId, 8> ScalarizedVectors;
/// For nodes that need to be split this map indicates which operands are the
/// expanded version of the input.
- SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors;
+ SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> SplitVectors;
/// For vector nodes that need to be widened, indicates the widened value to
/// use.
- SmallDenseMap<SDValue, SDValue, 8> WidenedVectors;
+ SmallDenseMap<TableId, TableId, 8> WidenedVectors;
/// For values that have been replaced with another, indicates the replacement
/// value to use.
- SmallDenseMap<SDValue, SDValue, 8> ReplacedValues;
+ SmallDenseMap<TableId, TableId, 8> ReplacedValues;
/// This defines a worklist of nodes to process. In order to be pushed onto
/// this worklist, all operands of a node must have already been processed.
SmallVector<SDNode*, 128> Worklist;
+ TableId getTableId(SDValue V) {
+ assert(V.getNode() && "Getting TableId on SDValue()");
+
+ auto I = ValueToIdMap.find(V);
+ if (I != ValueToIdMap.end()) {
+ // replace if there's been a shift.
+ RemapId(I->second);
+ assert(I->second && "All Ids should be nonzero");
+ return I->second;
+ }
+ // Add if it's not there.
+ ValueToIdMap.insert(std::make_pair(V, NextValueId));
+ IdToValueMap.insert(std::make_pair(NextValueId, V));
+ ++NextValueId;
+ assert(NextValueId != 0 &&
+ "Ran out of Ids. Increase id type size or add compactification");
+ return NextValueId - 1;
+ }
+
+ const SDValue &getSDValue(TableId &Id) {
+ RemapId(Id);
+ assert(Id && "TableId should be non-zero");
+ return IdToValueMap[Id];
+ }
+
public:
explicit DAGTypeLegalizer(SelectionDAG &dag)
: TLI(dag.getTargetLoweringInfo()), DAG(dag),
@@ -147,10 +182,25 @@ public:
bool run();
void NoteDeletion(SDNode *Old, SDNode *New) {
- ExpungeNode(Old);
- ExpungeNode(New);
- for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
- ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
+ TableId NewId = getTableId(SDValue(New, i));
+ TableId OldId = getTableId(SDValue(Old, i));
+
+ if (OldId != NewId)
+ ReplacedValues[OldId] = NewId;
+
+ // Delete Node from tables.
+ ValueToIdMap.erase(SDValue(Old, i));
+ IdToValueMap.erase(OldId);
+ PromotedIntegers.erase(OldId);
+ ExpandedIntegers.erase(OldId);
+ SoftenedFloats.erase(OldId);
+ PromotedFloats.erase(OldId);
+ ExpandedFloats.erase(OldId);
+ ScalarizedVectors.erase(OldId);
+ SplitVectors.erase(OldId);
+ WidenedVectors.erase(OldId);
+ }
}
SelectionDAG &getDAG() const { return DAG; }
@@ -158,9 +208,9 @@ public:
private:
SDNode *AnalyzeNewNode(SDNode *N);
void AnalyzeNewValue(SDValue &Val);
- void ExpungeNode(SDNode *N);
void PerformExpensiveChecks();
- void RemapValue(SDValue &N);
+ void RemapId(TableId &Id);
+ void RemapValue(SDValue &V);
// Common routines.
SDValue BitConvertToInteger(SDValue Op);
@@ -207,8 +257,8 @@ private:
/// returns an i32, the lower 16 bits of which coincide with Op, and the upper
/// 16 bits of which contain rubbish.
SDValue GetPromotedInteger(SDValue Op) {
- SDValue &PromotedOp = PromotedIntegers[Op];
- RemapValue(PromotedOp);
+ TableId &PromotedId = PromotedIntegers[getTableId(Op)];
+ SDValue PromotedOp = getSDValue(PromotedId);
assert(PromotedOp.getNode() && "Operand wasn't promoted?");
return PromotedOp;
}
@@ -282,7 +332,7 @@ private:
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
// Integer Operand Promotion.
- bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
+ bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N);
SDValue PromoteIntOp_BITCAST(SDNode *N);
@@ -373,11 +423,10 @@ private:
bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
// Integer Operand Expansion.
- bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
+ bool ExpandIntegerOperand(SDNode *N, unsigned OpNo);
SDValue ExpandIntOp_BR_CC(SDNode *N);
SDValue ExpandIntOp_SELECT_CC(SDNode *N);
SDValue ExpandIntOp_SETCC(SDNode *N);
- SDValue ExpandIntOp_SETCCE(SDNode *N);
SDValue ExpandIntOp_SETCCCARRY(SDNode *N);
SDValue ExpandIntOp_Shift(SDNode *N);
SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
@@ -403,16 +452,15 @@ private:
/// stay in a register, the Op is not converted to an integer.
/// In that case, the given op is returned.
SDValue GetSoftenedFloat(SDValue Op) {
- auto Iter = SoftenedFloats.find(Op);
+ TableId Id = getTableId(Op);
+ auto Iter = SoftenedFloats.find(Id);
if (Iter == SoftenedFloats.end()) {
assert(isSimpleLegalType(Op.getValueType()) &&
"Operand wasn't converted to integer?");
return Op;
}
-
- SDValue &SoftenedOp = Iter->second;
+ SDValue SoftenedOp = getSDValue(Iter->second);
assert(SoftenedOp.getNode() && "Unconverted op in SoftenedFloats?");
- RemapValue(SoftenedOp);
return SoftenedOp;
}
void SetSoftenedFloat(SDValue Op, SDValue Result);
@@ -531,7 +579,7 @@ private:
void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
// Float Operand Expansion.
- bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
+ bool ExpandFloatOperand(SDNode *N, unsigned OpNo);
SDValue ExpandFloatOp_BR_CC(SDNode *N);
SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N);
SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
@@ -549,8 +597,8 @@ private:
//===--------------------------------------------------------------------===//
SDValue GetPromotedFloat(SDValue Op) {
- SDValue &PromotedOp = PromotedFloats[Op];
- RemapValue(PromotedOp);
+ TableId &PromotedId = PromotedFloats[getTableId(Op)];
+ SDValue PromotedOp = getSDValue(PromotedId);
assert(PromotedOp.getNode() && "Operand wasn't promoted?");
return PromotedOp;
}
@@ -572,7 +620,7 @@ private:
SDValue PromoteFloatRes_UNDEF(SDNode *N);
SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
- bool PromoteFloatOperand(SDNode *N, unsigned ResNo);
+ bool PromoteFloatOperand(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
@@ -589,15 +637,15 @@ private:
/// element type, this returns the element. For example, if Op is a v1i32,
/// Op = < i32 val >, this method returns val, an i32.
SDValue GetScalarizedVector(SDValue Op) {
- SDValue &ScalarizedOp = ScalarizedVectors[Op];
- RemapValue(ScalarizedOp);
+ TableId &ScalarizedId = ScalarizedVectors[getTableId(Op)];
+ SDValue ScalarizedOp = getSDValue(ScalarizedId);
assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
return ScalarizedOp;
}
void SetScalarizedVector(SDValue Op, SDValue Result);
// Vector Result Scalarization: <1 x ty> -> ty.
- void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
+ void ScalarizeVectorResult(SDNode *N, unsigned ResNo);
SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue ScalarizeVecRes_BinOp(SDNode *N);
SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
@@ -646,13 +694,14 @@ private:
void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
- void SplitVectorResult(SDNode *N, unsigned OpNo);
+ void SplitVectorResult(SDNode *N, unsigned ResNo);
void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -662,9 +711,9 @@ private:
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
@@ -684,7 +733,7 @@ private:
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
- SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
@@ -701,8 +750,8 @@ private:
/// method returns a v4i32 for which the first two elements are the same as
/// those of Op, while the last two elements contain rubbish.
SDValue GetWidenedVector(SDValue Op) {
- SDValue &WidenedOp = WidenedVectors[Op];
- RemapValue(WidenedOp);
+ TableId &WidenedId = WidenedVectors[getTableId(Op)];
+ SDValue WidenedOp = getSDValue(WidenedId);
assert(WidenedOp.getNode() && "Operand wasn't widened?");
return WidenedOp;
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 993465ae9dc2..df3134828af5 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -300,6 +300,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align);
Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
+ Chain = Hi.getValue(1);
// Handle endianness of the load.
if (TLI.hasBigEndianPartOrdering(OVT, DAG.getDataLayout()))
@@ -307,7 +308,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Modified the chain - switch anything that used the old chain to use
// the new one.
- ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), Chain);
}
@@ -384,7 +385,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
// Build a vector of twice the length out of the expanded elements.
// For example <3 x i64> -> <6 x i32>.
- std::vector<SDValue> NewElts;
+ SmallVector<SDValue, 16> NewElts;
NewElts.reserve(NumElts*2);
for (unsigned i = 0; i < NumElts; ++i) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 6a141818bb6d..67928d4bdbd5 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -32,7 +32,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -41,6 +40,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
#include <cstdint>
@@ -63,7 +63,7 @@ class VectorLegalizer {
/// legalizing the same thing more than once.
SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
- /// \brief Adds a node to the translation cache.
+ /// Adds a node to the translation cache.
void AddLegalizedOperand(SDValue From, SDValue To) {
LegalizedNodes.insert(std::make_pair(From, To));
// If someone requests legalization of the new node, return itself.
@@ -71,55 +71,55 @@ class VectorLegalizer {
LegalizedNodes.insert(std::make_pair(To, To));
}
- /// \brief Legalizes the given node.
+ /// Legalizes the given node.
SDValue LegalizeOp(SDValue Op);
- /// \brief Assuming the node is legal, "legalize" the results.
+ /// Assuming the node is legal, "legalize" the results.
SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
- /// \brief Implements unrolling a VSETCC.
+ /// Implements unrolling a VSETCC.
SDValue UnrollVSETCC(SDValue Op);
- /// \brief Implement expand-based legalization of vector operations.
+ /// Implement expand-based legalization of vector operations.
///
/// This is just a high-level routine to dispatch to specific code paths for
/// operations to legalize them.
SDValue Expand(SDValue Op);
- /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if
+ /// Implements expansion for FNEG; falls back to UnrollVectorOp if
/// FSUB isn't legal.
///
/// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
/// SINT_TO_FLOAT and SHR on vectors isn't legal.
SDValue ExpandUINT_TO_FLOAT(SDValue Op);
- /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
+ /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
SDValue ExpandSEXTINREG(SDValue Op);
- /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG.
+ /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
///
/// Shuffles the low lanes of the operand into place and bitcasts to the proper
/// type. The contents of the bits in the extended part of each element are
/// undef.
SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
- /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG.
+ /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
///
/// Shuffles the low lanes of the operand into place, bitcasts to the proper
/// type, then shifts left and arithmetic shifts right to introduce a sign
/// extension.
SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
- /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG.
+ /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
///
/// Shuffles the low lanes of the operand into place and blends zeros into
/// the remaining lanes, finally bitcasting to the proper type.
SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
- /// \brief Expand bswap of vectors into a shuffle if legal.
+ /// Expand bswap of vectors into a shuffle if legal.
SDValue ExpandBSWAP(SDValue Op);
- /// \brief Implement vselect in terms of XOR, AND, OR when blend is not
+ /// Implement vselect in terms of XOR, AND, OR when blend is not
/// supported by the target.
SDValue ExpandVSELECT(SDValue Op);
SDValue ExpandSELECT(SDValue Op);
@@ -130,19 +130,20 @@ class VectorLegalizer {
SDValue ExpandBITREVERSE(SDValue Op);
SDValue ExpandCTLZ(SDValue Op);
SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
-
- /// \brief Implements vector promotion.
+ SDValue ExpandStrictFPOp(SDValue Op);
+
+ /// Implements vector promotion.
///
/// This is essentially just bitcasting the operands to a different type and
/// bitcasting the result back to the original type.
SDValue Promote(SDValue Op);
- /// \brief Implements [SU]INT_TO_FP vector promotion.
+ /// Implements [SU]INT_TO_FP vector promotion.
///
/// This is a [zs]ext of the input operand to a larger integer type.
SDValue PromoteINT_TO_FP(SDValue Op);
- /// \brief Implements FP_TO_[SU]INT vector promotion of the result type.
+ /// Implements FP_TO_[SU]INT vector promotion of the result type.
///
/// It is promoted to a larger integer type. The result is then
/// truncated back to the original type.
@@ -152,7 +153,7 @@ public:
VectorLegalizer(SelectionDAG& dag) :
DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
- /// \brief Begin legalizer the vector operations in the DAG.
+ /// Begin legalizer the vector operations in the DAG.
bool Run();
};
@@ -222,14 +223,16 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
for (const SDValue &Op : Node->op_values())
Ops.push_back(LegalizeOp(Op));
- SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0);
+ SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops),
+ Op.getResNo());
bool HasVectorValue = false;
if (Op.getOpcode() == ISD::LOAD) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
ISD::LoadExtType ExtType = LD->getExtensionType();
if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
- DEBUG(dbgs() << "\nLegalizing extending vector load: "; Node->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
+ Node->dump(&DAG));
switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
LD->getMemoryVT())) {
default: llvm_unreachable("This action is not supported yet!");
@@ -261,8 +264,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
EVT StVT = ST->getMemoryVT();
MVT ValVT = ST->getValue().getSimpleValueType();
if (StVT.isVector() && ST->isTruncatingStore()) {
- DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
- Node->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
+ Node->dump(&DAG));
switch (TLI.getTruncStoreAction(ValVT, StVT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
@@ -287,10 +290,34 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
if (!HasVectorValue)
return TranslateLegalizeResults(Op, Result);
- EVT QueryType;
+ TargetLowering::LegalizeAction Action = TargetLowering::Legal;
switch (Op.getOpcode()) {
default:
return TranslateLegalizeResults(Op, Result);
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ // These pseudo-ops get legalized as if they were their non-strict
+ // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
+ // is also legal, but if ISD::FSQRT requires expansion then so does
+ // ISD::STRICT_FSQRT.
+ Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ break;
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
@@ -366,42 +393,47 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::UMAX:
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI:
- QueryType = Node->getValueType(0);
+ case ISD::FCANONICALIZE:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
case ISD::FP_ROUND_INREG:
- QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ cast<VTSDNode>(Node->getOperand(1))->getVT());
break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
- QueryType = Node->getOperand(0).getValueType();
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
break;
case ISD::MSCATTER:
- QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType();
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ cast<MaskedScatterSDNode>(Node)->getValue().getValueType());
break;
case ISD::MSTORE:
- QueryType = cast<MaskedStoreSDNode>(Node)->getValue().getValueType();
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
break;
}
- DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
- switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
+ switch (Action) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Promote:
Result = Promote(Op);
Changed = true;
break;
case TargetLowering::Legal:
- DEBUG(dbgs() << "Legal node: nothing to do\n");
+ LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
break;
case TargetLowering::Custom: {
- DEBUG(dbgs() << "Trying custom legalization\n");
+ LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
- DEBUG(dbgs() << "Successfully custom legalized node\n");
+ LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
Result = Tmp1;
break;
}
- DEBUG(dbgs() << "Could not custom legalize node\n");
+ LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
LLVM_FALLTHROUGH;
}
case TargetLowering::Expand:
@@ -649,9 +681,14 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
} else {
SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
-
- NewChain = Scalarized.getValue(1);
- Value = Scalarized.getValue(0);
+ // Skip past MERGE_VALUE node if known.
+ if (Scalarized->getOpcode() == ISD::MERGE_VALUES) {
+ NewChain = Scalarized.getOperand(1);
+ Value = Scalarized.getOperand(0);
+ } else {
+ NewChain = Scalarized.getValue(1);
+ Value = Scalarized.getValue(0);
+ }
}
AddLegalizedOperand(Op.getValue(0), Value);
@@ -662,35 +699,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
SDValue VectorLegalizer::ExpandStore(SDValue Op) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
-
- EVT StVT = ST->getMemoryVT();
- EVT MemSclVT = StVT.getScalarType();
- unsigned ScalarSize = MemSclVT.getSizeInBits();
-
- // Round odd types to the next pow of two.
- if (!isPowerOf2_32(ScalarSize)) {
- // FIXME: This is completely broken and inconsistent with ExpandLoad
- // handling.
-
- // For sub-byte element sizes, this ends up with 0 stride between elements,
- // so the same element just gets re-written to the same location. There seem
- // to be tests explicitly testing for this broken behavior though. tests
- // for this broken behavior.
-
- LLVMContext &Ctx = *DAG.getContext();
-
- EVT NewMemVT
- = EVT::getVectorVT(Ctx,
- MemSclVT.getIntegerVT(Ctx, NextPowerOf2(ScalarSize)),
- StVT.getVectorNumElements());
-
- SDValue NewVectorStore = DAG.getTruncStore(
- ST->getChain(), SDLoc(Op), ST->getValue(), ST->getBasePtr(),
- ST->getPointerInfo(), NewMemVT, ST->getAlignment(),
- ST->getMemOperand()->getFlags(), ST->getAAInfo());
- ST = cast<StoreSDNode>(NewVectorStore.getNode());
- }
-
SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
AddLegalizedOperand(Op, TF);
return TF;
@@ -727,6 +735,24 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return ExpandCTLZ(Op);
case ISD::CTTZ_ZERO_UNDEF:
return ExpandCTTZ_ZERO_UNDEF(Op);
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ return ExpandStrictFPOp(Op);
default:
return DAG.UnrollVectorOp(Op.getNode());
}
@@ -1020,7 +1046,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
// Two to the power of half-word-size.
- SDValue TWOHW = DAG.getConstantFP(1 << (BW / 2), DL, Op.getValueType());
+ SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
// Clear upper part of LO, lower HI
SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
@@ -1113,6 +1139,53 @@ SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {
return DAG.UnrollVectorOp(Op.getNode());
}
+SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned NumOpers = Op.getNumOperands();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT ValueVTs[] = {EltVT, MVT::Other};
+ SDValue Chain = Op.getOperand(0);
+ SDLoc dl(Op);
+
+ SmallVector<SDValue, 32> OpValues;
+ SmallVector<SDValue, 32> OpChains;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SmallVector<SDValue, 4> Opers;
+ SDValue Idx = DAG.getConstant(i, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
+
+ // The Chain is the first operand.
+ Opers.push_back(Chain);
+
+ // Now process the remaining operands.
+ for (unsigned j = 1; j < NumOpers; ++j) {
+ SDValue Oper = Op.getOperand(j);
+ EVT OperVT = Oper.getValueType();
+
+ if (OperVT.isVector())
+ Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ EltVT, Oper, Idx);
+
+ Opers.push_back(Oper);
+ }
+
+ SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
+
+ OpValues.push_back(ScalarOp.getValue(0));
+ OpChains.push_back(ScalarOp.getValue(1));
+ }
+
+ SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
+
+ AddLegalizedOperand(Op.getValue(0), Result);
+ AddLegalizedOperand(Op.getValue(1), NewChain);
+
+ return NewChain;
+}
+
SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
EVT VT = Op.getValueType();
unsigned NumElems = VT.getVectorNumElements();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index df1cbeb92740..1cd43ace48f3 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -33,9 +33,8 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue R = SDValue();
switch (N->getOpcode()) {
@@ -169,9 +168,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().isVector()
+ && Op.getValueType().getVectorNumElements() == 1
+ && !isSimpleLegalType(Op.getValueType()))
+ Op = GetScalarizedVector(Op);
EVT NewVT = N->getValueType(0).getVectorElementType();
return DAG.getNode(ISD::BITCAST, SDLoc(N),
- NewVT, N->getOperand(0));
+ NewVT, Op);
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
@@ -338,8 +342,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
ScalarBool = TargetLowering::UndefinedBooleanContent;
}
+ EVT CondVT = Cond.getValueType();
if (ScalarBool != VecBool) {
- EVT CondVT = Cond.getValueType();
switch (ScalarBool) {
case TargetLowering::UndefinedBooleanContent:
break;
@@ -360,6 +364,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
}
}
+ // Truncate the condition if needed
+ auto BoolVT = getSetCCResultType(CondVT);
+ if (BoolVT.bitsLT(CondVT))
+ Cond = DAG.getNode(ISD::TRUNCATE, SDLoc(N), BoolVT, Cond);
+
return DAG.getSelect(SDLoc(N),
LHS.getValueType(), Cond, LHS,
GetScalarizedVector(N->getOperand(2)));
@@ -433,9 +442,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue Res = SDValue();
if (!Res.getNode()) {
@@ -515,7 +523,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
N->getValueType(0).getScalarType(), Elt);
// Revectorize the result so the types line up with what the uses of this
// expression expect.
- return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Op);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);
}
/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
@@ -618,9 +626,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
/// invalid operands or may have other results that need legalization, we just
/// know that (at least) one result needs vector splitting.
void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Split node result: ";
- N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); dbgs() << "\n");
SDValue Lo, Hi;
// See if the target wants to custom expand this node.
@@ -749,6 +755,25 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMA:
SplitVecRes_TernaryOp(N, Lo, Hi);
break;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ SplitVecRes_StrictFPOp(N, Lo, Hi);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1028,6 +1053,56 @@ void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi);
}
+void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ unsigned NumOps = N->getNumOperands();
+ SDValue Chain = N->getOperand(0);
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ SmallVector<SDValue, 4> OpsLo;
+ SmallVector<SDValue, 4> OpsHi;
+
+ // The Chain is the first operand.
+ OpsLo.push_back(Chain);
+ OpsHi.push_back(Chain);
+
+ // Now process the remaining operands.
+ for (unsigned i = 1; i < NumOps; ++i) {
+ SDValue Op = N->getOperand(i);
+ SDValue OpLo = Op;
+ SDValue OpHi = Op;
+
+ EVT InVT = Op.getValueType();
+ if (InVT.isVector()) {
+ // If the input also splits, handle it directly for a
+ // compile time speedup. Otherwise split it by hand.
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Op, OpLo, OpHi);
+ else
+ std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i);
+ }
+
+ OpsLo.push_back(OpLo);
+ OpsHi.push_back(OpHi);
+ }
+
+ EVT LoValueVTs[] = {LoVT, MVT::Other};
+ EVT HiValueVTs[] = {HiVT, MVT::Other};
+ Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi);
+
+ // Build a factor node to remember that this Op is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Lo.getValue(1), Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Vec = N->getOperand(0);
@@ -1200,16 +1275,16 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
MLD->isExpandingLoad());
+ unsigned HiOffset = LoMemVT.getStoreSize();
- MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
- MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
- SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad,
+ HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(),
+ MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
ExtType, MLD->isExpandingLoad());
-
// Build a factor node to remember that this load is independent of the
// other one.
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
@@ -1232,6 +1307,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
SDValue Mask = MGT->getMask();
SDValue Src0 = MGT->getValue();
SDValue Index = MGT->getIndex();
+ SDValue Scale = MGT->getScale();
unsigned Alignment = MGT->getOriginalAlignment();
// Split Mask operand
@@ -1263,11 +1339,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
- SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
+ SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
MMO);
- SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
+ SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale};
Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
MMO);
@@ -1365,8 +1441,8 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
- DEBUG(dbgs() << "Split vector extend via incremental extend:";
- N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:";
+ N->dump(&DAG); dbgs() << "\n");
// Extend the source vector by one step.
SDValue NewSrc =
DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
@@ -1501,9 +1577,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
/// the node are known to be legal, but other operands of the node may need
/// legalization as well as the specified one.
bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Split node operand: ";
- N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); dbgs() << "\n");
SDValue Res = SDValue();
// See if the target wants to custom split this node.
@@ -1683,8 +1757,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {
// Use the appropriate scalar instruction on the split subvectors before
// reducing the now partially reduced smaller vector.
- SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi);
- return DAG.getNode(N->getOpcode(), dl, ResVT, Partial);
+ SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags());
+ return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags());
}
SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
@@ -1810,6 +1884,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue Ch = MGT->getChain();
SDValue Ptr = MGT->getBasePtr();
SDValue Index = MGT->getIndex();
+ SDValue Scale = MGT->getScale();
SDValue Mask = MGT->getMask();
SDValue Src0 = MGT->getValue();
unsigned Alignment = MGT->getOriginalAlignment();
@@ -1842,7 +1917,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
- SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
+ SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale};
SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
OpsLo, MMO);
@@ -1852,7 +1927,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
Alignment, MGT->getAAInfo(),
MGT->getRanges());
- SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
+ SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale};
SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
OpsHi, MMO);
@@ -1916,10 +1991,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
N->isCompressingStore());
- MMO = DAG.getMachineFunction().
- getMachineMemOperand(N->getPointerInfo(),
- MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
- SecondHalfAlignment, N->getAAInfo(), N->getRanges());
+ unsigned HiOffset = LoMemVT.getStoreSize();
+
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore,
+ HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(),
+ N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
N->isTruncatingStore(), N->isCompressingStore());
@@ -1935,6 +2012,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
SDValue Ptr = N->getBasePtr();
SDValue Mask = N->getMask();
SDValue Index = N->getIndex();
+ SDValue Scale = N->getScale();
SDValue Data = N->getValue();
EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment();
@@ -1970,7 +2048,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
- SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo};
+ SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
DL, OpsLo, MMO);
@@ -1982,7 +2060,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
// The order of the Scatter operation after split is well defined. The "Hi"
// part comes after the "Lo". So these two operations should be chained one
// after another.
- SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi};
+ SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
DL, OpsHi, MMO);
}
@@ -2005,6 +2083,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+ // Scalarize if the split halves are not byte-sized.
+ if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized())
+ return TLI.scalarizeVectorStore(N, DAG);
+
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
if (isTruncating)
@@ -2089,9 +2171,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
return SplitVecOp_UnaryOp(N);
SDLoc DL(N);
- // Extract the halves of the input via extract_subvector.
+ // Get the split input vector.
SDValue InLoVec, InHiVec;
- std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL);
+ GetSplitVector(InVec, InLoVec, InHiVec);
// Truncate them to 1/2 the element size.
EVT HalfElementVT = IsFloat ?
EVT::getFloatingPointVT(InElementSize/2) :
@@ -2164,9 +2246,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
// See if the target wants to custom widen this node.
if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
@@ -2948,6 +3029,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType();
SDValue Src0 = GetWidenedVector(N->getValue());
+ SDValue Scale = N->getScale();
unsigned NumElts = WideVT.getVectorNumElements();
SDLoc dl(N);
@@ -2963,7 +3045,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
Index.getValueType().getScalarType(),
NumElts);
Index = ModifyToType(Index, WideIndexVT);
- SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index, Scale };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
N->getMemOperand());
@@ -3309,9 +3391,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
// Widen Vector Operand
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue Res = SDValue();
// See if the target wants to custom widen this node.
@@ -3420,7 +3501,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
// low lanes.
switch (N->getOpcode()) {
default:
- llvm_unreachable("Extend legalization on on extend operation!");
+ llvm_unreachable("Extend legalization on extend operation!");
case ISD::ANY_EXTEND:
return DAG.getAnyExtendVectorInReg(InOp, DL, VT);
case ISD::SIGN_EXTEND:
@@ -3544,6 +3625,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
// vector type.
StoreSDNode *ST = cast<StoreSDNode>(N);
+ if (!ST->getMemoryVT().getScalarType().isByteSized())
+ return TLI.scalarizeVectorStore(ST, DAG);
+
SmallVector<SDValue, 16> StChain;
if (ST->isTruncatingStore())
GenWidenVectorTruncStores(StChain, ST);
@@ -3587,6 +3671,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
SDValue DataOp = MSC->getValue();
SDValue Mask = MSC->getMask();
EVT MaskVT = Mask.getValueType();
+ SDValue Scale = MSC->getScale();
// Widen the value.
SDValue WideVal = GetWidenedVector(DataOp);
@@ -3606,7 +3691,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
NumElts);
Index = ModifyToType(Index, WideIndexVT);
- SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index};
+ SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index,
+ Scale};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
MSC->getMemoryVT(), dl, Ops,
MSC->getMemOperand());
@@ -3616,6 +3702,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue InOp0 = GetWidenedVector(N->getOperand(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
SDLoc dl(N);
+ EVT VT = N->getValueType(0);
// WARNING: In this code we widen the compare instruction with garbage.
// This garbage may contain denormal floats which may be slow. Is this a real
@@ -3625,18 +3712,23 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
// Only some of the compared elements are legal.
EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
InOp0.getValueType());
+ // The result type is legal, if its vXi1, keep vXi1 for the new SETCC.
+ if (VT.getScalarType() == MVT::i1)
+ SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ SVT.getVectorNumElements());
+
SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
- SVT, InOp0, InOp1, N->getOperand(2));
+ SVT, InOp0, InOp1, N->getOperand(2));
// Extract the needed results from the result vector.
EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
SVT.getVectorElementType(),
- N->getValueType(0).getVectorNumElements());
+ VT.getVectorNumElements());
SDValue CC = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- return PromoteTargetBoolean(CC, N->getValueType(0));
+ return PromoteTargetBoolean(CC, VT);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index cf92907a8b5f..7e6b57426338 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -32,7 +32,8 @@ public:
enum DbgValueKind {
SDNODE = 0, ///< Value is the result of an expression.
CONST = 1, ///< Value is a constant.
- FRAMEIX = 2 ///< Value is contents of a stack location.
+ FRAMEIX = 2, ///< Value is contents of a stack location.
+ VREG = 3 ///< Value is a virtual register.
};
private:
union {
@@ -42,6 +43,7 @@ private:
} s;
const Value *Const; ///< Valid for constants.
unsigned FrameIx; ///< Valid for stack objects.
+ unsigned VReg; ///< Valid for registers.
} u;
DIVariable *Var;
DIExpression *Expr;
@@ -69,12 +71,18 @@ public:
u.Const = C;
}
- /// Constructor for frame indices.
- SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned FI, DebugLoc dl,
- unsigned O)
- : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) {
- kind = FRAMEIX;
- u.FrameIx = FI;
+ /// Constructor for virtual registers and frame indices.
+ SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned VRegOrFrameIdx,
+ bool IsIndirect, DebugLoc DL, unsigned Order,
+ enum DbgValueKind Kind)
+ : Var(Var), Expr(Expr), DL(DL), Order(Order), IsIndirect(IsIndirect) {
+ assert((Kind == VREG || Kind == FRAMEIX) &&
+ "Invalid SDDbgValue constructor");
+ kind = Kind;
+ if (kind == VREG)
+ u.VReg = VRegOrFrameIdx;
+ else
+ u.FrameIx = VRegOrFrameIdx;
}
/// Returns the kind.
@@ -98,6 +106,9 @@ public:
/// Returns the FrameIx for a stack object
unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; }
+ /// Returns the Virtual Register for a VReg
+ unsigned getVReg() const { assert (kind==VREG); return u.VReg; }
+
/// Returns whether this is an indirect value.
bool isIndirect() const { return IsIndirect; }
@@ -115,6 +126,28 @@ public:
bool isInvalidated() const { return Invalid; }
};
+/// Holds the information from a dbg_label node through SDISel.
+/// We do not use SDValue here to avoid including its header.
+class SDDbgLabel {
+ MDNode *Label;
+ DebugLoc DL;
+ unsigned Order;
+
+public:
+ SDDbgLabel(MDNode *Label, DebugLoc dl, unsigned O)
+ : Label(Label), DL(std::move(dl)), Order(O) {}
+
+ /// Returns the MDNode pointer for the label.
+ MDNode *getLabel() const { return Label; }
+
+ /// Returns the DebugLoc.
+ DebugLoc getDebugLoc() const { return DL; }
+
+ /// Returns the SDNodeOrder. This is the order of the preceding node in the
+ /// input.
+ unsigned getOrder() const { return Order; }
+};
+
} // end llvm namespace
#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 698e14453d1d..3944d7df286d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -13,6 +13,7 @@
#include "InstrEmitter.h"
#include "ScheduleDAGSDNodes.h"
+#include "SDNodeDbgValue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
@@ -115,7 +116,7 @@ private:
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGFast::Schedule() {
- DEBUG(dbgs() << "********** List Scheduling **********\n");
+ LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");
NumLiveRegs = 0;
LiveRegDefs.resize(TRI->getNumRegs(), nullptr);
@@ -124,8 +125,8 @@ void ScheduleDAGFast::Schedule() {
// Build the scheduling graph.
BuildSchedGraph(nullptr);
- DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this));
+ LLVM_DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su]
+ .dumpAll(this));
// Execute the actual scheduling loop.
ListScheduleBottomUp();
@@ -180,8 +181,8 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
/// count of its predecessors. If a predecessor pending count is zero, add it to
/// the Available queue.
void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
- DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
- DEBUG(SU->dump(this));
+ LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ LLVM_DEBUG(SU->dump(this));
assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
SU->setHeightToAtLeast(CurCycle);
@@ -236,7 +237,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
return nullptr;
- DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+ LLVM_DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
assert(NewNodes.size() == 2 && "Expected a load folding node!");
N = NewNodes[1];
@@ -346,7 +347,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
SU = NewSU;
}
- DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+ LLVM_DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
NewSU = Clone(SU);
// New SUnit has the exact same predecessors.
@@ -592,14 +593,14 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
// Issue copies, these can be expensive cross register class copies.
SmallVector<SUnit*, 2> Copies;
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
- DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
- << " to SU #" << Copies.front()->NodeNum << "\n");
+ LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
NewDef = Copies.back();
}
- DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
- << " to SU #" << TrySU->NodeNum << "\n");
+ LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
LiveRegDefs[Reg] = NewDef;
AddPred(NewDef, SDep(TrySU, SDep::Artificial));
TrySU->isAvailable = false;
@@ -666,8 +667,8 @@ void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
// These nodes do not need to be translated into MIs.
return;
- DEBUG(dbgs() << "\n*** Scheduling: ");
- DEBUG(N->dump(DAG));
+ LLVM_DEBUG(dbgs() << "\n*** Scheduling: ");
+ LLVM_DEBUG(N->dump(DAG));
Sequence.push_back(N);
unsigned NumOps = N->getNumOperands();
@@ -713,7 +714,7 @@ static SDNode *findGluedUser(SDNode *N) {
}
void ScheduleDAGLinearize::Schedule() {
- DEBUG(dbgs() << "********** DAG Linearization **********\n");
+ LLVM_DEBUG(dbgs() << "********** DAG Linearization **********\n");
SmallVector<SDNode*, 8> Glues;
unsigned DAGSize = 0;
@@ -763,19 +764,29 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
InstrEmitter Emitter(BB, InsertPos);
DenseMap<SDValue, unsigned> VRBaseMap;
- DEBUG({
- dbgs() << "\n*** Final schedule ***\n";
- });
+ LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; });
- // FIXME: Handle dbg_values.
unsigned NumNodes = Sequence.size();
+ MachineBasicBlock *BB = Emitter.getBlock();
for (unsigned i = 0; i != NumNodes; ++i) {
SDNode *N = Sequence[NumNodes-i-1];
- DEBUG(N->dump(DAG));
+ LLVM_DEBUG(N->dump(DAG));
Emitter.EmitNode(N, false, false, VRBaseMap);
+
+ // Emit any debug values associated with the node.
+ if (N->getHasDebugValue()) {
+ MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
+ for (auto DV : DAG->GetDbgValues(N)) {
+ if (DV->isInvalidated())
+ continue;
+ if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap))
+ BB->insert(InsertPos, DbgMI);
+ DV->setIsInvalidated();
+ }
+ }
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
InsertPos = Emitter.getInsertPos();
return Emitter.getBlock();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 82337d43c5c9..43e8ffd3839c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -26,7 +26,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
@@ -37,6 +36,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -46,6 +46,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -346,8 +347,8 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGRRList::Schedule() {
- DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
- << " '" << BB->getName() << "' **********\n");
+ LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
+ << " '" << BB->getName() << "' **********\n");
CurCycle = 0;
IssueCount = 0;
@@ -364,8 +365,7 @@ void ScheduleDAGRRList::Schedule() {
// Build the scheduling graph.
BuildSchedGraph(nullptr);
- DEBUG(for (SUnit &SU : SUnits)
- SU.dumpAll(this));
+ LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this));
Topo.InitDAGTopologicalSorting();
AvailableQueue->initNodes(SUnits);
@@ -377,11 +377,11 @@ void ScheduleDAGRRList::Schedule() {
AvailableQueue->releaseState();
- DEBUG({
- dbgs() << "*** Final schedule ***\n";
- dumpSchedule();
- dbgs() << '\n';
- });
+ LLVM_DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
}
//===----------------------------------------------------------------------===//
@@ -728,13 +728,13 @@ static void resetVRegCycle(SUnit *SU);
/// count of its predecessors. If a predecessor pending count is zero, add it to
/// the Available queue.
void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
- DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
- DEBUG(SU->dump(this));
+ LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
+ LLVM_DEBUG(SU->dump(this));
#ifndef NDEBUG
if (CurCycle < SU->getHeight())
- DEBUG(dbgs() << " Height [" << SU->getHeight()
- << "] pipeline stall!\n");
+ LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
+ << "] pipeline stall!\n");
#endif
// FIXME: Do not modify node height. It may interfere with
@@ -827,8 +827,8 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
/// its predecessor states to reflect the change.
void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
- DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
- DEBUG(SU->dump(this));
+ LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
+ LLVM_DEBUG(SU->dump(this));
for (SDep &Pred : SU->Preds) {
CapturePred(&Pred);
@@ -1010,7 +1010,35 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
computeLatency(LoadSU);
}
- DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
+ bool isNewN = true;
+ SUnit *NewSU;
+ // This can only happen when isNewLoad is false.
+ if (N->getNodeId() != -1) {
+ NewSU = &SUnits[N->getNodeId()];
+ // If NewSU has already been scheduled, we need to clone it, but this
+ // negates the benefit to unfolding so just return SU.
+ if (NewSU->isScheduled)
+ return SU;
+ isNewN = false;
+ } else {
+ NewSU = CreateNewSUnit(N);
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
+ computeLatency(NewSU);
+ }
+
+ LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
// Now that we are committed to unfolding replace DAG Uses.
for (unsigned i = 0; i != NumVals; ++i)
@@ -1018,23 +1046,6 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
SDValue(LoadNode, 1));
- SUnit *NewSU = CreateNewSUnit(N);
- assert(N->getNodeId() == -1 && "Node already inserted!");
- N->setNodeId(NewSU->NodeNum);
-
- const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
- for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
- if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
- NewSU->isTwoAddress = true;
- break;
- }
- }
- if (MCID.isCommutable())
- NewSU->isCommutable = true;
-
- InitNumRegDefsLeft(NewSU);
- computeLatency(NewSU);
-
// Record all the edges to and from the old SU, by category.
SmallVector<SDep, 4> ChainPreds;
SmallVector<SDep, 4> ChainSuccs;
@@ -1100,7 +1111,8 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
if (isNewLoad)
AvailableQueue->addNode(LoadSU);
- AvailableQueue->addNode(NewSU);
+ if (isNewN)
+ AvailableQueue->addNode(NewSU);
++NumUnfolds;
@@ -1117,12 +1129,13 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
if (!N)
return nullptr;
- DEBUG(dbgs() << "Considering duplicating the SU\n");
- DEBUG(SU->dump(this));
+ LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
+ LLVM_DEBUG(SU->dump(this));
if (N->getGluedNode() &&
!TII->canCopyGluedNodeDuringSchedule(N)) {
- DEBUG(dbgs()
+ LLVM_DEBUG(
+ dbgs()
<< "Giving up because it has incoming glue and the target does not "
"want to copy it\n");
return nullptr;
@@ -1133,7 +1146,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
MVT VT = N->getSimpleValueType(i);
if (VT == MVT::Glue) {
- DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
+ LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
return nullptr;
} else if (VT == MVT::Other)
TryUnfold = true;
@@ -1141,8 +1154,9 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
for (const SDValue &Op : N->op_values()) {
MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
- DEBUG(dbgs() << "Giving up because it one of the operands is glue and "
- "the target does not want to copy it\n");
+ LLVM_DEBUG(
+ dbgs() << "Giving up because it one of the operands is glue and "
+ "the target does not want to copy it\n");
return nullptr;
}
}
@@ -1159,7 +1173,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
return SU;
}
- DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
+ LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
NewSU = CreateClone(SU);
// New SUnit has the exact same predecessors.
@@ -1420,7 +1434,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
// Furthermore, it may have been made available again, in which case it is
// now already in the AvailableQueue.
if (SU->isAvailable && !SU->NodeQueueId) {
- DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
+ LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
AvailableQueue->push(SU);
}
if (i < Interferences.size())
@@ -1441,12 +1455,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
SmallVector<unsigned, 4> LRegs;
if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
break;
- DEBUG(dbgs() << " Interfering reg ";
- if (LRegs[0] == TRI->getNumRegs())
- dbgs() << "CallResource";
- else
- dbgs() << printReg(LRegs[0], TRI);
- dbgs() << " SU #" << CurSU->NodeNum << '\n');
+ LLVM_DEBUG(dbgs() << " Interfering reg ";
+ if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
+ else dbgs() << printReg(LRegs[0], TRI);
+ dbgs() << " SU #" << CurSU->NodeNum << '\n');
std::pair<LRegsMapT::iterator, bool> LRegsPair =
LRegsMap.insert(std::make_pair(CurSU, LRegs));
if (LRegsPair.second) {
@@ -1492,17 +1504,17 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
if (!BtSU->isPending)
AvailableQueue->remove(BtSU);
}
- DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
- << TrySU->NodeNum << ")\n");
+ LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
+ << ") to SU(" << TrySU->NodeNum << ")\n");
AddPred(TrySU, SDep(BtSU, SDep::Artificial));
// If one or more successors has been unscheduled, then the current
// node is no longer available.
if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
- DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
+ LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
CurSU = AvailableQueue->pop();
} else {
- DEBUG(dbgs() << "TrySU available\n");
+ LLVM_DEBUG(dbgs() << "TrySU available\n");
// Available and in AvailableQueue
AvailableQueue->remove(TrySU);
CurSU = TrySU;
@@ -1546,14 +1558,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
// Issue copies, these can be expensive cross register class copies.
SmallVector<SUnit*, 2> Copies;
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
- DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
- << " to SU #" << Copies.front()->NodeNum << "\n");
+ LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
NewDef = Copies.back();
}
- DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
- << " to SU #" << TrySU->NodeNum << "\n");
+ LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
LiveRegDefs[Reg] = NewDef;
AddPred(NewDef, SDep(TrySU, SDep::Artificial));
TrySU->isAvailable = false;
@@ -1581,8 +1593,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
// priority. If it is not ready put it back. Schedule the node.
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty() || !Interferences.empty()) {
- DEBUG(dbgs() << "\nExamining Available:\n";
- AvailableQueue->dump(this));
+ LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
+ AvailableQueue->dump(this));
// Pick the best node to schedule taking all constraints into
// consideration.
@@ -2045,8 +2057,8 @@ LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
unsigned Id = RC->getID();
unsigned RP = RegPressure[Id];
if (!RP) continue;
- DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
- << RegLimit[Id] << '\n');
+ LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
+ << RegLimit[Id] << '\n');
}
}
#endif
@@ -2198,14 +2210,15 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) {
if (RegPressure[RCId] < Cost) {
// Register pressure tracking is imprecise. This can happen. But we try
// hard not to let it happen because it likely results in poor scheduling.
- DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
+ LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
+ << ") has too many regdefs\n");
RegPressure[RCId] = 0;
}
else {
RegPressure[RCId] -= Cost;
}
}
- DEBUG(dumpRegPressure());
+ LLVM_DEBUG(dumpRegPressure());
}
void RegReductionPQBase::unscheduledNode(SUnit *SU) {
@@ -2285,7 +2298,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
}
}
- DEBUG(dumpRegPressure());
+ LLVM_DEBUG(dumpRegPressure());
}
//===----------------------------------------------------------------------===//
@@ -2380,7 +2393,7 @@ static void initVRegCycle(SUnit *SU) {
if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
return;
- DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
+ LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
SU->isVRegCycle = true;
@@ -2418,7 +2431,7 @@ static bool hasVRegCycleUse(const SUnit *SU) {
if (Pred.isCtrl()) continue; // ignore chain preds
if (Pred.getSUnit()->isVRegCycle &&
Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
- DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
+ LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
return true;
}
}
@@ -2478,9 +2491,9 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
int LDepth = left->getDepth() - LPenalty;
int RDepth = right->getDepth() - RPenalty;
if (LDepth != RDepth) {
- DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
- << ") depth " << LDepth << " vs SU (" << right->NodeNum
- << ") depth " << RDepth << "\n");
+ LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
return LDepth < RDepth ? 1 : -1;
}
if (left->Latency != right->Latency)
@@ -2502,9 +2515,9 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
static const char *const PhysRegMsg[] = { " has no physreg",
" defines a physreg" };
#endif
- DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
- << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
- << PhysRegMsg[RHasPhysReg] << "\n");
+ LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
+ << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
+ << ") " << PhysRegMsg[RHasPhysReg] << "\n");
return LHasPhysReg < RHasPhysReg;
}
}
@@ -2648,13 +2661,13 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
if (LHigh && !RHigh) {
- DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
- << right->NodeNum << ")\n");
+ LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
+ << right->NodeNum << ")\n");
return true;
}
else if (!LHigh && RHigh) {
- DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
- << left->NodeNum << ")\n");
+ LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
+ << left->NodeNum << ")\n");
return false;
}
if (!LHigh && !RHigh) {
@@ -2716,8 +2729,9 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
}
if (!DisableSchedRegPressure && LPDiff != RPDiff) {
- DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
- << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
+ LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
+ << "): " << LPDiff << " != SU(" << right->NodeNum
+ << "): " << RPDiff << "\n");
return LPDiff > RPDiff;
}
@@ -2729,8 +2743,9 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
}
if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
- DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
- << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
+ LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
+ << " != SU(" << right->NodeNum << "): " << RLiveUses
+ << "\n");
return LLiveUses < RLiveUses;
}
@@ -2744,9 +2759,9 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
if (!DisableSchedCriticalPath) {
int spread = (int)left->getDepth() - (int)right->getDepth();
if (std::abs(spread) > MaxReorderWindow) {
- DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
- << left->getDepth() << " != SU(" << right->NodeNum << "): "
- << right->getDepth() << "\n");
+ LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
+ << left->getDepth() << " != SU(" << right->NodeNum
+ << "): " << right->getDepth() << "\n");
return left->getDepth() < right->getDepth();
}
}
@@ -2967,9 +2982,10 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// Ok, the transformation is safe and the heuristics suggest it is
// profitable. Update the graph.
- DEBUG(dbgs() << " Prescheduling SU #" << SU.NodeNum
- << " next to PredSU #" << PredSU->NodeNum
- << " to guide scheduling in the presence of multiple uses\n");
+ LLVM_DEBUG(
+ dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
+ << PredSU->NodeNum
+ << " to guide scheduling in the presence of multiple uses\n");
for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
SDep Edge = PredSU->Succs[i];
assert(!Edge.isAssignedRegDep());
@@ -3058,8 +3074,9 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
(isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
(!SU.isCommutable && SuccSU->isCommutable)) &&
!scheduleDAG->IsReachable(SuccSU, &SU)) {
- DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
- << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
+ LLVM_DEBUG(dbgs()
+ << " Adding a pseudo-two-addr edge from SU #"
+ << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
}
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index c09b47af26a6..430d8fb34476 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -243,7 +244,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
return;
// Sort them in increasing order.
- std::sort(Offsets.begin(), Offsets.end());
+ llvm::sort(Offsets.begin(), Offsets.end());
// Check if the loads are close enough.
SmallVector<SDNode*, 4> Loads;
@@ -910,6 +911,39 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
MachineBasicBlock *InsertBB = Emitter.getBlock();
MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
+
+ SDDbgInfo::DbgLabelIterator DLI = DAG->DbgLabelBegin();
+ SDDbgInfo::DbgLabelIterator DLE = DAG->DbgLabelEnd();
+ // Now emit the rest according to source order.
+ LastOrder = 0;
+ for (const auto &InstrOrder : Orders) {
+ unsigned Order = InstrOrder.first;
+ MachineInstr *MI = InstrOrder.second;
+ if (!MI)
+ continue;
+
+ // Insert all SDDbgLabel's whose order(s) are before "Order".
+ for (; DLI != DLE &&
+ (*DLI)->getOrder() >= LastOrder && (*DLI)->getOrder() < Order;
+ ++DLI) {
+ MachineInstr *DbgMI = Emitter.EmitDbgLabel(*DLI);
+ if (DbgMI) {
+ if (!LastOrder)
+ // Insert to start of the BB (after PHIs).
+ BB->insert(BBBegin, DbgMI);
+ else {
+ // Insert at the instruction, which may be in a different
+ // block, if the block was split by a custom inserter.
+ MachineBasicBlock::iterator Pos = MI;
+ MI->getParent()->insert(Pos, DbgMI);
+ }
+ }
+ }
+ if (DLI == DLE)
+ break;
+
+ LastOrder = Order;
+ }
}
InsertPos = Emitter.getInsertPos();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index a058942c5689..6417e16bd0fd 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -17,10 +17,10 @@
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/MachineValueType.h"
#include <cassert>
#include <string>
#include <vector>
@@ -88,7 +88,7 @@ class InstrItineraryData;
/// Clone - Creates a clone of the specified SUnit. It does not copy the
/// predecessors / successors info nor the temporary scheduling states.
///
- SUnit *Clone(SUnit *N);
+ SUnit *Clone(SUnit *Old);
/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
/// are input. This SUnit graph is similar to the SelectionDAG, but
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 07b46b9183ab..84055f8ecc1a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -93,8 +93,8 @@ private:
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGVLIW::Schedule() {
- DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
- << " '" << BB->getName() << "' **********\n");
+ LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
+ << " '" << BB->getName() << "' **********\n");
// Build the scheduling graph.
BuildSchedGraph(AA);
@@ -151,8 +151,8 @@ void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
/// count of its successors. If a successor pending count is zero, add it to
/// the Available queue.
void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
- DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
- DEBUG(SU->dump(this));
+ LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ LLVM_DEBUG(SU->dump(this));
Sequence.push_back(SU);
assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
@@ -246,7 +246,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() {
} else if (!HasNoopHazards) {
// Otherwise, we have a pipeline stall, but no other problem, just advance
// the current cycle and try again.
- DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
+ LLVM_DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
HazardRec->AdvanceCycle();
++NumStalls;
++CurCycle;
@@ -254,7 +254,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() {
// Otherwise, we have no instructions to issue and we have instructions
// that will fault if we don't do this right. This is the case for
// processors without pipeline interlocks and other cases.
- DEBUG(dbgs() << "*** Emitting noop\n");
+ LLVM_DEBUG(dbgs() << "*** Emitting noop\n");
HazardRec->EmitNoop();
Sequence.push_back(nullptr); // NULL here means noop
++NumNoops;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 3ffc6fa9a059..48e03c6da68f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -32,7 +32,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -58,6 +57,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Mutex.h"
@@ -89,11 +89,16 @@ void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
#define DEBUG_TYPE "selectiondag"
+static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt",
+ cl::Hidden, cl::init(true),
+ cl::desc("Gang up loads and stores generated by inlining of memcpy"));
+
+static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max",
+ cl::desc("Number limit for gluing ld/st of memcpy."),
+ cl::Hidden, cl::init(0));
+
static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) {
- DEBUG(
- dbgs() << Msg;
- V.getNode()->dump(G);
- );
+ LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G););
}
//===----------------------------------------------------------------------===//
@@ -263,6 +268,52 @@ bool ISD::allOperandsUndef(const SDNode *N) {
return true;
}
+bool ISD::matchUnaryPredicate(SDValue Op,
+ std::function<bool(ConstantSDNode *)> Match) {
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
+ return Match(Cst);
+
+ if (ISD::BUILD_VECTOR != Op.getOpcode())
+ return false;
+
+ EVT SVT = Op.getValueType().getScalarType();
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
+ if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
+ return false;
+ }
+ return true;
+}
+
+bool ISD::matchBinaryPredicate(
+ SDValue LHS, SDValue RHS,
+ std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
+ if (LHS.getValueType() != RHS.getValueType())
+ return false;
+
+ if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
+ if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
+ return Match(LHSCst, RHSCst);
+
+ if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
+ ISD::BUILD_VECTOR != RHS.getOpcode())
+ return false;
+
+ EVT SVT = LHS.getValueType().getScalarType();
+ for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+ auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
+ auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
+ if (!LHSCst || !RHSCst)
+ return false;
+ if (LHSCst->getValueType(0) != SVT ||
+ LHSCst->getValueType(0) != RHSCst->getValueType(0))
+ return false;
+ if (!Match(LHSCst, RHSCst))
+ return false;
+ }
+ return true;
+}
+
ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
switch (ExtType) {
case ISD::EXTLOAD:
@@ -487,12 +538,41 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
break;
}
+ case ISD::MLOAD: {
+ const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
+ ID.AddInteger(MLD->getMemoryVT().getRawBits());
+ ID.AddInteger(MLD->getRawSubclassData());
+ ID.AddInteger(MLD->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::MSTORE: {
+ const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
+ ID.AddInteger(MST->getMemoryVT().getRawBits());
+ ID.AddInteger(MST->getRawSubclassData());
+ ID.AddInteger(MST->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::MGATHER: {
+ const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(N);
+ ID.AddInteger(MG->getMemoryVT().getRawBits());
+ ID.AddInteger(MG->getRawSubclassData());
+ ID.AddInteger(MG->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::MSCATTER: {
+ const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(N);
+ ID.AddInteger(MS->getMemoryVT().getRawBits());
+ ID.AddInteger(MS->getRawSubclassData());
+ ID.AddInteger(MS->getPointerInfo().getAddrSpace());
+ break;
+ }
case ISD::ATOMIC_CMP_SWAP:
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
@@ -726,7 +806,7 @@ static void VerifySDNode(SDNode *N) {
}
#endif // NDEBUG
-/// \brief Insert a newly allocated node into the DAG.
+/// Insert a newly allocated node into the DAG.
///
/// Handles insertion into the all nodes list and CSE map, as well as
/// verification and other common operations when a new node is allocated.
@@ -903,13 +983,16 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
void SelectionDAG::init(MachineFunction &NewMF,
OptimizationRemarkEmitter &NewORE,
- Pass *PassPtr) {
+ Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
+ DivergenceAnalysis * Divergence) {
MF = &NewMF;
SDAGISelPass = PassPtr;
ORE = &NewORE;
TLI = getSubtarget().getTargetLowering();
TSI = getSubtarget().getSelectionDAGInfo();
+ LibInfo = LibraryInfo;
Context = &MF->getFunction().getContext();
+ DA = Divergence;
}
SelectionDAG::~SelectionDAG() {
@@ -1077,21 +1160,25 @@ SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
}
SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
- EVT EltVT = VT.getScalarType();
- SDValue TrueValue;
- switch (TLI->getBooleanContents(VT)) {
- case TargetLowering::ZeroOrOneBooleanContent:
- case TargetLowering::UndefinedBooleanContent:
- TrueValue = getConstant(1, DL, VT);
- break;
- case TargetLowering::ZeroOrNegativeOneBooleanContent:
- TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL,
- VT);
- break;
- }
+ SDValue TrueValue = getBoolConstant(true, DL, VT, VT);
return getNode(ISD::XOR, DL, VT, Val, TrueValue);
}
+SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT,
+ EVT OpVT) {
+ if (!V)
+ return getConstant(0, DL, VT);
+
+ switch (TLI->getBooleanContents(OpVT)) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ return getConstant(1, DL, VT);
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return getAllOnesConstant(DL, VT);
+ }
+ llvm_unreachable("Unexpected boolean content enum!");
+}
+
SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT,
bool isT, bool isO) {
EVT EltVT = VT.getScalarType();
@@ -1184,7 +1271,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
return SDValue(N, 0);
if (!N) {
- N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT);
+ N = newSDNode<ConstantSDNode>(isT, isO, Elt, EltVT);
CSEMap.InsertNode(N, IP);
InsertNode(N);
NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this);
@@ -1227,7 +1314,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
return SDValue(N, 0);
if (!N) {
- N = newSDNode<ConstantFPSDNode>(isTarget, &V, DL.getDebugLoc(), EltVT);
+ N = newSDNode<ConstantFPSDNode>(isTarget, &V, EltVT);
CSEMap.InsertNode(N, IP);
InsertNode(N);
}
@@ -1503,33 +1590,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
if (N1.isUndef())
commuteShuffle(N1, N2, MaskVec);
- // If shuffling a splat, try to blend the splat instead. We do this here so
- // that even when this arises during lowering we don't have to re-handle it.
- auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
- BitVector UndefElements;
- SDValue Splat = BV->getSplatValue(&UndefElements);
- if (!Splat)
- return;
+ if (TLI->hasVectorBlend()) {
+ // If shuffling a splat, try to blend the splat instead. We do this here so
+ // that even when this arises during lowering we don't have to re-handle it.
+ auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
+ BitVector UndefElements;
+ SDValue Splat = BV->getSplatValue(&UndefElements);
+ if (!Splat)
+ return;
- for (int i = 0; i < NElts; ++i) {
- if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts))
- continue;
+ for (int i = 0; i < NElts; ++i) {
+ if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts))
+ continue;
- // If this input comes from undef, mark it as such.
- if (UndefElements[MaskVec[i] - Offset]) {
- MaskVec[i] = -1;
- continue;
- }
+ // If this input comes from undef, mark it as such.
+ if (UndefElements[MaskVec[i] - Offset]) {
+ MaskVec[i] = -1;
+ continue;
+ }
- // If we can blend a non-undef lane, use that instead.
- if (!UndefElements[i])
- MaskVec[i] = i + Offset;
- }
- };
- if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
- BlendSplat(N1BV, 0);
- if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
- BlendSplat(N2BV, NElts);
+ // If we can blend a non-undef lane, use that instead.
+ if (!UndefElements[i])
+ MaskVec[i] = i + Offset;
+ }
+ };
+ if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+ BlendSplat(N1BV, 0);
+ if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
+ BlendSplat(N2BV, NElts);
+ }
// Canonicalize all index into lhs, -> shuffle lhs, undef
// Canonicalize all index into rhs, -> shuffle rhs, undef
@@ -1643,7 +1732,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
}
SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
- MVT VT = SV.getSimpleValueType(0);
+ EVT VT = SV.getValueType(0);
SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());
ShuffleVectorSDNode::commuteMask(MaskVec);
@@ -1661,6 +1750,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
return SDValue(E, 0);
auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
+ N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1870,19 +1960,15 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
ISD::CondCode Cond, const SDLoc &dl) {
+ EVT OpVT = N1.getValueType();
+
// These setcc operations always fold.
switch (Cond) {
default: break;
case ISD::SETFALSE:
- case ISD::SETFALSE2: return getConstant(0, dl, VT);
+ case ISD::SETFALSE2: return getBoolConstant(false, dl, VT, OpVT);
case ISD::SETTRUE:
- case ISD::SETTRUE2: {
- TargetLowering::BooleanContent Cnt =
- TLI->getBooleanContents(N1->getValueType(0));
- return getConstant(
- Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl,
- VT);
- }
+ case ISD::SETTRUE2: return getBoolConstant(true, dl, VT, OpVT);
case ISD::SETOEQ:
case ISD::SETOGT:
@@ -1905,16 +1991,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
switch (Cond) {
default: llvm_unreachable("Unknown integer setcc!");
- case ISD::SETEQ: return getConstant(C1 == C2, dl, VT);
- case ISD::SETNE: return getConstant(C1 != C2, dl, VT);
- case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT);
- case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT);
- case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT);
- case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT);
- case ISD::SETLT: return getConstant(C1.slt(C2), dl, VT);
- case ISD::SETGT: return getConstant(C1.sgt(C2), dl, VT);
- case ISD::SETLE: return getConstant(C1.sle(C2), dl, VT);
- case ISD::SETGE: return getConstant(C1.sge(C2), dl, VT);
+ case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT);
+ case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT);
+ case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT);
+ case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT);
+ case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT);
+ case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT);
+ case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT);
+ case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT);
+ case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT);
+ case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT);
}
}
}
@@ -1926,41 +2012,54 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
case ISD::SETEQ: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT);
+ case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
+ OpVT);
case ISD::SETNE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpLessThan, dl, VT);
+ case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
case ISD::SETLT: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT);
+ case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
case ISD::SETGT: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT);
+ case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
+ VT, OpVT);
case ISD::SETLE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
- R==APFloat::cmpEqual, dl, VT);
+ case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual, dl, VT,
+ OpVT);
case ISD::SETGE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpEqual, dl, VT);
- case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, dl, VT);
- case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, dl, VT);
- case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
- R==APFloat::cmpEqual, dl, VT);
- case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT);
- case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
- R==APFloat::cmpLessThan, dl, VT);
- case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpUnordered, dl, VT);
- case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT);
- case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT);
+ case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual, dl, VT, OpVT);
+ case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl,
+ VT, OpVT);
+ case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT,
+ OpVT);
}
} else {
// Ensure that the constant occurs on the RHS.
@@ -2297,10 +2396,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
- // Support big-endian targets when it becomes useful.
bool IsLE = getDataLayout().isLittleEndian();
- if (!IsLE)
- break;
// Bitcast 'small element' vector to 'large element' scalar/vector.
if ((BitWidth % SubBitWidth) == 0) {
@@ -2319,8 +2415,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
for (unsigned i = 0; i != SubScale; ++i) {
computeKnownBits(N0, Known2, SubDemandedElts.shl(i),
Depth + 1);
- Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i);
- Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i);
+ unsigned Shifts = IsLE ? i : SubScale - 1 - i;
+ Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts);
+ Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * Shifts);
}
}
@@ -2342,7 +2439,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
Known.Zero.setAllBits(); Known.One.setAllBits();
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
- unsigned Offset = (i % SubScale) * BitWidth;
+ unsigned Shifts = IsLE ? i : NumElts - 1 - i;
+ unsigned Offset = (Shifts % SubScale) * BitWidth;
Known.One &= Known2.One.lshr(Offset).trunc(BitWidth);
Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth);
// If we don't know any bits, early out.
@@ -2441,6 +2539,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
case ISD::SMULO:
case ISD::UMULO:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
if (Op.getResNo() != 1)
break;
// The boolean result conforms to getBooleanContents.
@@ -2904,11 +3003,38 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::SMIN:
case ISD::SMAX: {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts,
- Depth + 1);
- // If we don't know any bits, early out.
- if (Known.isUnknown())
- break;
+ // If we have a clamp pattern, we know that the number of sign bits will be
+ // the minimum of the clamp min/max range.
+ bool IsMax = (Opcode == ISD::SMAX);
+ ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
+ if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)))
+ if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
+ CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1),
+ DemandedElts);
+ if (CstLow && CstHigh) {
+ if (!IsMax)
+ std::swap(CstLow, CstHigh);
+
+ const APInt &ValueLow = CstLow->getAPIntValue();
+ const APInt &ValueHigh = CstHigh->getAPIntValue();
+ if (ValueLow.sle(ValueHigh)) {
+ unsigned LowSignBits = ValueLow.getNumSignBits();
+ unsigned HighSignBits = ValueHigh.getNumSignBits();
+ unsigned MinSignBits = std::min(LowSignBits, HighSignBits);
+ if (ValueLow.isNegative() && ValueHigh.isNegative()) {
+ Known.One.setHighBits(MinSignBits);
+ break;
+ }
+ if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()) {
+ Known.Zero.setHighBits(MinSignBits);
+ break;
+ }
+ }
+ }
+
+ // Fallback - just get the shared known bits of the operands.
+ computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ if (Known.isUnknown()) break; // Early-out
computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
@@ -3038,7 +3164,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
if (!DemandedElts)
return 1; // No demanded elts, better to assume we don't know anything.
- switch (Op.getOpcode()) {
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
default: break;
case ISD::AssertSext:
Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
@@ -3189,7 +3316,32 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return std::min(Tmp, Tmp2);
case ISD::SMIN:
- case ISD::SMAX:
+ case ISD::SMAX: {
+ // If we have a clamp pattern, we know that the number of sign bits will be
+ // the minimum of the clamp min/max range.
+ bool IsMax = (Opcode == ISD::SMAX);
+ ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
+ if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)))
+ if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
+ CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1),
+ DemandedElts);
+ if (CstLow && CstHigh) {
+ if (!IsMax)
+ std::swap(CstLow, CstHigh);
+ if (CstLow->getAPIntValue().sle(CstHigh->getAPIntValue())) {
+ Tmp = CstLow->getAPIntValue().getNumSignBits();
+ Tmp2 = CstHigh->getAPIntValue().getNumSignBits();
+ return std::min(Tmp, Tmp2);
+ }
+ }
+
+ // Fallback - just get the minimum number of sign bits of the operands.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (Tmp == 1)
+ return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ return std::min(Tmp, Tmp2);
+ }
case ISD::UMIN:
case ISD::UMAX:
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
@@ -3225,7 +3377,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned RotAmt = C->getAPIntValue().urem(VTBits);
// Handle rotate right by N like a rotate left by 32-N.
- if (Op.getOpcode() == ISD::ROTR)
+ if (Opcode == ISD::ROTR)
RotAmt = (VTBits - RotAmt) % VTBits;
// If we aren't rotating out all of the known-in sign bits, return the
@@ -3423,10 +3575,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
}
// Allow the target to implement this method for its nodes.
- if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
- Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
- Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
- Op.getOpcode() == ISD::INTRINSIC_VOID) {
+ if (Opcode >= ISD::BUILTIN_OP_END ||
+ Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::INTRINSIC_VOID) {
unsigned NumBits =
TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
if (NumBits > 1)
@@ -3487,17 +3639,33 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
return false;
}
-bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
+ assert(Op.getValueType().isFloatingPoint() &&
+ "Floating point type expected");
+
// If the value is a constant, we can obviously see if it is a zero or not.
+ // TODO: Add BuildVector support.
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
return !C->isZero();
+ return false;
+}
+
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+ assert(!Op.getValueType().isFloatingPoint() &&
+ "Floating point types unsupported - use isKnownNeverZeroFloat");
+
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ if (ISD::matchUnaryPredicate(
+ Op, [](ConstantSDNode *C) { return !C->isNullValue(); }))
+ return true;
// TODO: Recognize more cases here.
switch (Op.getOpcode()) {
default: break;
case ISD::OR:
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
- return !C->isNullValue();
+ if (isKnownNeverZero(Op.getOperand(1)) ||
+ isKnownNeverZero(Op.getOperand(0)))
+ return true;
break;
}
@@ -3517,6 +3685,8 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
return false;
}
+// FIXME: unify with llvm::haveNoCommonBitsSet.
+// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M)
bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
assert(A.getValueType() == B.getValueType() &&
"Values must have the same type");
@@ -3841,11 +4011,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
else if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
- // (ext (trunx x)) -> x
+ // (ext (trunc x)) -> x
if (OpOpcode == ISD::TRUNCATE) {
SDValue OpOp = Operand.getOperand(0);
- if (OpOp.getValueType() == VT)
+ if (OpOp.getValueType() == VT) {
+ transferDbgValues(Operand, OpOp);
return OpOp;
+ }
}
break;
case ISD::TRUNCATE:
@@ -3921,10 +4093,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::FNEG:
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
- if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
- // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
+ if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) &&
+ OpOpcode == ISD::FSUB)
return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
- Operand.getOperand(0), Operand.getNode()->getFlags());
+ Operand.getOperand(0), Flags);
if (OpOpcode == ISD::FNEG) // --X -> X
return Operand.getOperand(0);
break;
@@ -4314,24 +4486,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
- if (getTarget().Options.UnsafeFPMath) {
- if (Opcode == ISD::FADD) {
- // x+0 --> x
- if (N2CFP && N2CFP->getValueAPF().isZero())
- return N1;
- } else if (Opcode == ISD::FSUB) {
- // x-0 --> x
- if (N2CFP && N2CFP->getValueAPF().isZero())
- return N1;
- } else if (Opcode == ISD::FMUL) {
- // x*0 --> 0
- if (N2CFP && N2CFP->isZero())
- return N2;
- // x*1 --> x
- if (N2CFP && N2CFP->isExactlyValue(1.0))
- return N1;
- }
- }
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
@@ -4448,12 +4602,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
case ISD::EXTRACT_VECTOR_ELT:
+ assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() &&
+ "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \
+ element type of the vector.");
+
// EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
if (N1.isUndef())
return getUNDEF(VT);
// EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
- if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+ if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements()))
return getUNDEF(VT);
// EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
@@ -4635,6 +4793,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
}
+ // Any FP binop with an undef operand is folded to NaN. This matches the
+ // behavior of the IR optimizer.
+ switch (Opcode) {
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (N1.isUndef() || N2.isUndef())
+ return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
+ }
+
// Canonicalize an UNDEF to the RHS, even over a constant.
if (N1.isUndef()) {
if (TLI->isCommutativeBinOp(Opcode)) {
@@ -4644,22 +4814,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::FP_ROUND_INREG:
case ISD::SIGN_EXTEND_INREG:
case ISD::SUB:
- case ISD::FSUB:
- case ISD::FDIV:
- case ISD::FREM:
- case ISD::SRA:
- return N1; // fold op(undef, arg2) -> undef
+ return getUNDEF(VT); // fold op(undef, arg2) -> undef
case ISD::UDIV:
case ISD::SDIV:
case ISD::UREM:
case ISD::SREM:
+ case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
- if (!VT.isVector())
- return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0
- // For vectors, we can't easily build an all zero vector, just return
- // the LHS.
- return N2;
+ return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0
}
}
}
@@ -4681,32 +4844,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::SDIV:
case ISD::UREM:
case ISD::SREM:
- return N2; // fold op(arg1, undef) -> undef
- case ISD::FADD:
- case ISD::FSUB:
- case ISD::FMUL:
- case ISD::FDIV:
- case ISD::FREM:
- if (getTarget().Options.UnsafeFPMath)
- return N2;
- break;
- case ISD::MUL:
- case ISD::AND:
+ case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
- if (!VT.isVector())
- return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
- // For vectors, we can't easily build an all zero vector, just return
- // the LHS.
- return N1;
+ return getUNDEF(VT); // fold op(arg1, undef) -> undef
+ case ISD::MUL:
+ case ISD::AND:
+ return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
case ISD::OR:
- if (!VT.isVector())
- return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
- // For vectors, we can't easily build an all one vector, just return
- // the LHS.
- return N1;
- case ISD::SRA:
- return N1;
+ return getAllOnesConstant(DL, VT);
}
}
@@ -4739,10 +4885,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
- SDValue N1, SDValue N2, SDValue N3) {
+ SDValue N1, SDValue N2, SDValue N3,
+ const SDNodeFlags Flags) {
// Perform various simplifications.
switch (Opcode) {
case ISD::FMA: {
+ assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+ assert(N1.getValueType() == VT && N2.getValueType() == VT &&
+ N3.getValueType() == VT && "FMA types must match!");
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3);
@@ -4833,10 +4983,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ E->intersectFlagsWith(Flags);
return SDValue(E, 0);
+ }
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ N->setFlags(Flags);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
@@ -5107,6 +5260,31 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
return MF.getFunction().optForSize();
}
+static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
+ SmallVector<SDValue, 32> &OutChains, unsigned From,
+ unsigned To, SmallVector<SDValue, 16> &OutLoadChains,
+ SmallVector<SDValue, 16> &OutStoreChains) {
+ assert(OutLoadChains.size() && "Missing loads in memcpy inlining");
+ assert(OutStoreChains.size() && "Missing stores in memcpy inlining");
+ SmallVector<SDValue, 16> GluedLoadChains;
+ for (unsigned i = From; i < To; ++i) {
+ OutChains.push_back(OutLoadChains[i]);
+ GluedLoadChains.push_back(OutLoadChains[i]);
+ }
+
+ // Chain for all loads.
+ SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ GluedLoadChains);
+
+ for (unsigned i = From; i < To; ++i) {
+ StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]);
+ SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(),
+ ST->getBasePtr(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ OutChains.push_back(NewStore);
+ }
+}
+
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
uint64_t Size, unsigned Align,
@@ -5171,7 +5349,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
MachineMemOperand::Flags MMOFlags =
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
- SmallVector<SDValue, 8> OutChains;
+ SmallVector<SDValue, 16> OutLoadChains;
+ SmallVector<SDValue, 16> OutStoreChains;
+ SmallVector<SDValue, 32> OutChains;
unsigned NumMemOps = MemOps.size();
uint64_t SrcOff = 0, DstOff = 0;
for (unsigned i = 0; i != NumMemOps; ++i) {
@@ -5205,11 +5385,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SubSlice.Length = VTSize;
}
Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
- if (Value.getNode())
+ if (Value.getNode()) {
Store = DAG.getStore(Chain, dl, Value,
DAG.getMemBasePlusOffset(Dst, DstOff, dl),
DstPtrInfo.getWithOffset(DstOff), Align,
MMOFlags);
+ OutChains.push_back(Store);
+ }
}
if (!Store.getNode()) {
@@ -5231,17 +5413,61 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
DAG.getMemBasePlusOffset(Src, SrcOff, dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
- OutChains.push_back(Value.getValue(1));
+ OutLoadChains.push_back(Value.getValue(1));
+
Store = DAG.getTruncStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags);
+ OutStoreChains.push_back(Store);
}
- OutChains.push_back(Store);
SrcOff += VTSize;
DstOff += VTSize;
Size -= VTSize;
}
+ unsigned GluedLdStLimit = MaxLdStGlue == 0 ?
+ TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue;
+ unsigned NumLdStInMemcpy = OutStoreChains.size();
+
+ if (NumLdStInMemcpy) {
+ // It may be that memcpy might be converted to memset if it's memcpy
+ // of constants. In such a case, we won't have loads and stores, but
+ // just stores. In the absence of loads, there is nothing to gang up.
+ if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) {
+ // If target does not care, just leave as it.
+ for (unsigned i = 0; i < NumLdStInMemcpy; ++i) {
+ OutChains.push_back(OutLoadChains[i]);
+ OutChains.push_back(OutStoreChains[i]);
+ }
+ } else {
+ // Ld/St less than/equal limit set by target.
+ if (NumLdStInMemcpy <= GluedLdStLimit) {
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,
+ NumLdStInMemcpy, OutLoadChains,
+ OutStoreChains);
+ } else {
+ unsigned NumberLdChain = NumLdStInMemcpy / GluedLdStLimit;
+ unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit;
+ unsigned GlueIter = 0;
+
+ for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) {
+ unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit;
+ unsigned IndexTo = NumLdStInMemcpy - GlueIter;
+
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo,
+ OutLoadChains, OutStoreChains);
+ GlueIter += GluedLdStLimit;
+ }
+
+ // Residual ld/st.
+ if (RemainingLdStInMemcpy) {
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,
+ RemainingLdStInMemcpy, OutLoadChains,
+ OutStoreChains);
+ }
+ }
+ }
+ }
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
@@ -5334,7 +5560,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
-/// \brief Lower the call to 'memset' intrinsic function into a series of store
+/// Lower the call to 'memset' intrinsic function into a series of store
/// operations.
///
/// \param DAG Selection DAG where lowered code is placed.
@@ -5518,6 +5744,47 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
return CallResult.second;
}
+SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, unsigned DstAlign,
+ SDValue Src, unsigned SrcAlign,
+ SDValue Size, Type *SizeTy,
+ unsigned ElemSz, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Node = Src;
+ Args.push_back(Entry);
+
+ Entry.Ty = SizeTy;
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(LibraryCall),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, unsigned Align,
bool isVol, bool isTailCall,
@@ -5579,6 +5846,47 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
return CallResult.second;
}
+SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, unsigned DstAlign,
+ SDValue Src, unsigned SrcAlign,
+ SDValue Size, Type *SizeTy,
+ unsigned ElemSz, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Node = Src;
+ Args.push_back(Entry);
+
+ Entry.Ty = SizeTy;
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(LibraryCall),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, unsigned Align,
bool isVol, bool isTailCall,
@@ -5641,6 +5949,46 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
return CallResult.second;
}
+SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, unsigned DstAlign,
+ SDValue Value, SDValue Size, Type *SizeTy,
+ unsigned ElemSz, bool isTailCall,
+ MachinePointerInfo DstPtrInfo) {
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Ty = Type::getInt8Ty(*getContext());
+ Entry.Node = Value;
+ Args.push_back(Entry);
+
+ Entry.Ty = SizeTy;
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(LibraryCall),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDVTList VTList, ArrayRef<SDValue> Ops,
MachineMemOperand *MMO) {
@@ -5736,6 +6084,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
Opcode == ISD::ATOMIC_LOAD_SUB ||
Opcode == ISD::ATOMIC_LOAD_AND ||
+ Opcode == ISD::ATOMIC_LOAD_CLR ||
Opcode == ISD::ATOMIC_LOAD_OR ||
Opcode == ISD::ATOMIC_LOAD_XOR ||
Opcode == ISD::ATOMIC_LOAD_NAND ||
@@ -6207,7 +6556,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO) {
- assert(Ops.size() == 5 && "Incompatible number of operands");
+ assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
@@ -6233,6 +6582,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
assert(N->getIndex().getValueType().getVectorNumElements() ==
N->getValueType(0).getVectorNumElements() &&
"Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
CSEMap.InsertNode(N, IP);
InsertNode(N);
@@ -6244,7 +6596,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO) {
- assert(Ops.size() == 5 && "Incompatible number of operands");
+ assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
@@ -6267,6 +6619,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
assert(N->getIndex().getValueType().getVectorNumElements() ==
N->getValue().getValueType().getVectorNumElements() &&
"Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
CSEMap.InsertNode(N, IP);
InsertNode(N);
@@ -6558,6 +6913,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
// Now we update the operands.
N->OperandList[0].set(Op);
+ updateDivergence(N);
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
return N;
@@ -6586,6 +6942,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
if (N->OperandList[1] != Op2)
N->OperandList[1].set(Op2);
+ updateDivergence(N);
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
return N;
@@ -6636,6 +6993,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
if (N->OperandList[i] != Ops[i])
N->OperandList[i].set(Ops[i]);
+ updateDivergence(N);
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
return N;
@@ -7061,11 +7419,24 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var,
/// FrameIndex
SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var,
DIExpression *Expr, unsigned FI,
+ bool IsIndirect,
const DebugLoc &DL,
unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, DL, O);
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(Var, Expr, FI, IsIndirect, DL, O, SDDbgValue::FRAMEIX);
+}
+
+/// VReg
+SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var,
+ DIExpression *Expr,
+ unsigned VReg, bool IsIndirect,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(Var, Expr, VReg, IsIndirect, DL, O, SDDbgValue::VREG);
}
void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
@@ -7155,8 +7526,9 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
ClonedDVs.push_back(Clone);
DV->setIsInvalidated();
- DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this);
- dbgs() << " into " << *DIExpr << '\n');
+ LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting";
+ N0.getNode()->dumprFull(this);
+ dbgs() << " into " << *DIExpr << '\n');
}
}
}
@@ -7165,6 +7537,14 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
AddDbgValue(Dbg, Dbg->getSDNode(), false);
}
+/// Creates a SDDbgLabel node.
+SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O);
+}
+
namespace {
/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
@@ -7227,8 +7607,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
SDUse &Use = UI.getUse();
++UI;
Use.set(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
-
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User);
@@ -7282,6 +7663,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
SDUse &Use = UI.getUse();
++UI;
Use.setNode(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
// Now that we have modified User, add it back to the CSE maps. If it
@@ -7326,8 +7709,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
const SDValue &ToOp = To[Use.getResNo()];
++UI;
Use.set(ToOp);
+ if (To->getNode()->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
-
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User);
@@ -7385,8 +7769,9 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
++UI;
Use.set(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
-
// We are iterating over all uses of the From node, so if a use
// doesn't use the specific value, no changes are made.
if (!UserRemovedFromCSEMaps)
@@ -7419,6 +7804,72 @@ namespace {
} // end anonymous namespace
+void SelectionDAG::updateDivergence(SDNode * N)
+{
+ if (TLI->isSDNodeAlwaysUniform(N))
+ return;
+ bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
+ for (auto &Op : N->ops()) {
+ if (Op.Val.getValueType() != MVT::Other)
+ IsDivergent |= Op.getNode()->isDivergent();
+ }
+ if (N->SDNodeBits.IsDivergent != IsDivergent) {
+ N->SDNodeBits.IsDivergent = IsDivergent;
+ for (auto U : N->uses()) {
+ updateDivergence(U);
+ }
+ }
+}
+
+
+void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {
+ DenseMap<SDNode *, unsigned> Degree;
+ Order.reserve(AllNodes.size());
+ for (auto & N : allnodes()) {
+ unsigned NOps = N.getNumOperands();
+ Degree[&N] = NOps;
+ if (0 == NOps)
+ Order.push_back(&N);
+ }
+ for (std::vector<SDNode *>::iterator I = Order.begin();
+ I!=Order.end();++I) {
+ SDNode * N = *I;
+ for (auto U : N->uses()) {
+ unsigned &UnsortedOps = Degree[U];
+ if (0 == --UnsortedOps)
+ Order.push_back(U);
+ }
+ }
+}
+
+void SelectionDAG::VerifyDAGDiverence()
+{
+ std::vector<SDNode*> TopoOrder;
+ CreateTopologicalOrder(TopoOrder);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ DenseMap<const SDNode *, bool> DivergenceMap;
+ for (auto &N : allnodes()) {
+ DivergenceMap[&N] = false;
+ }
+ for (auto N : TopoOrder) {
+ bool IsDivergent = DivergenceMap[N];
+ bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA);
+ for (auto &Op : N->ops()) {
+ if (Op.Val.getValueType() != MVT::Other)
+ IsSDNodeDivergent |= DivergenceMap[Op.getNode()];
+ }
+ if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) {
+ DivergenceMap[N] = true;
+ }
+ }
+ for (auto &N : allnodes()) {
+ (void)N;
+ assert(DivergenceMap[&N] == N.isDivergent() &&
+ "Divergence bit inconsistency detected\n");
+ }
+}
+
+
/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
/// uses of other values produced by From.getNode() alone. The same value
/// may appear in both the From and To list. The Deleted vector is
@@ -7450,7 +7901,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
}
// Sort the uses, so that all the uses from a given User are together.
- std::sort(Uses.begin(), Uses.end());
+ llvm::sort(Uses.begin(), Uses.end());
for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
UseIndex != UseIndexEnd; ) {
@@ -7579,6 +8030,10 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
DbgInfo->add(DB, SD, isParameter);
}
+void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) {
+ DbgInfo->add(DB);
+}
+
SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
SDValue NewMemOp) {
assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
@@ -7963,8 +8418,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
const GlobalValue *GV;
int64_t GVOffset = 0;
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
- unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
- KnownBits Known(PtrWidth);
+ unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType());
+ KnownBits Known(IdxWidth);
llvm::computeKnownBits(GV, Known, getDataLayout());
unsigned AlignBits = Known.countMinTrailingZeros();
unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
@@ -8198,7 +8653,7 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
return true;
}
-// \brief Returns the SDNode if it is a constant integer BuildVector
+// Returns the SDNode if it is a constant integer BuildVector
// or constant integer.
SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
if (isa<ConstantSDNode>(N))
@@ -8224,6 +8679,26 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
return nullptr;
}
+void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
+ assert(!Node->OperandList && "Node already has operands");
+ SDUse *Ops = OperandRecycler.allocate(
+ ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
+
+ bool IsDivergent = false;
+ for (unsigned I = 0; I != Vals.size(); ++I) {
+ Ops[I].setUser(Node);
+ Ops[I].setInitial(Vals[I]);
+ if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence.
+ IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent();
+ }
+ Node->NumOperands = Vals.size();
+ Node->OperandList = Ops;
+ IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA);
+ if (!TLI->isSDNodeAlwaysUniform(Node))
+ Node->SDNodeBits.IsDivergent = IsDivergent;
+ checkForCycles(Node);
+}
+
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode*> &Visited,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index da1574f60524..c859f16e74fe 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -99,16 +99,43 @@ BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,
}
// Consume constant adds & ors with appropriate masking.
- while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) {
- if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {
+ while (true) {
+ switch (Base->getOpcode()) {
+ case ISD::OR:
// Only consider ORs which act as adds.
- if (Base->getOpcode() == ISD::OR &&
- !DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue()))
- break;
- Offset += C->getSExtValue();
- Base = Base->getOperand(0);
- continue;
+ if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1)))
+ if (DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) {
+ Offset += C->getSExtValue();
+ Base = Base->getOperand(0);
+ continue;
+ }
+ break;
+ case ISD::ADD:
+ if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {
+ Offset += C->getSExtValue();
+ Base = Base->getOperand(0);
+ continue;
+ }
+ break;
+ case ISD::LOAD:
+ case ISD::STORE: {
+ auto *LSBase = cast<LSBaseSDNode>(Base.getNode());
+ unsigned int IndexResNo = (Base->getOpcode() == ISD::LOAD) ? 1 : 0;
+ if (LSBase->isIndexed() && Base.getResNo() == IndexResNo)
+ if (auto *C = dyn_cast<ConstantSDNode>(LSBase->getOffset())) {
+ auto Off = C->getSExtValue();
+ if (LSBase->getAddressingMode() == ISD::PRE_DEC ||
+ LSBase->getAddressingMode() == ISD::POST_DEC)
+ Offset -= Off;
+ else
+ Offset += Off;
+ Base = LSBase->getBasePtr();
+ continue;
+ }
+ break;
+ }
}
+ // If we get here break out of the loop.
break;
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 68bbd62e1321..1aa8df29af3b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "SelectionDAGBuilder.h"
+#include "SDNodeDbgValue.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
@@ -49,7 +50,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -102,6 +102,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -777,8 +778,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
EVT ValueVT = ValueVTs[Value];
unsigned NumRegs = RegCount[Value];
MVT RegisterVT = IsABIMangled
- ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
- : RegVTs[Value];
+ ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value])
+ : RegVTs[Value];
Parts.resize(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
@@ -818,32 +819,15 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
// FIXME: We capture more information than the dag can represent. For
// now, just use the tightest assertzext/assertsext possible.
- bool isSExt = true;
+ bool isSExt;
EVT FromVT(MVT::Other);
- if (NumSignBits == RegSize) {
- isSExt = true; // ASSERT SEXT 1
- FromVT = MVT::i1;
- } else if (NumZeroBits >= RegSize - 1) {
- isSExt = false; // ASSERT ZEXT 1
- FromVT = MVT::i1;
- } else if (NumSignBits > RegSize - 8) {
- isSExt = true; // ASSERT SEXT 8
- FromVT = MVT::i8;
- } else if (NumZeroBits >= RegSize - 8) {
- isSExt = false; // ASSERT ZEXT 8
- FromVT = MVT::i8;
- } else if (NumSignBits > RegSize - 16) {
- isSExt = true; // ASSERT SEXT 16
- FromVT = MVT::i16;
- } else if (NumZeroBits >= RegSize - 16) {
- isSExt = false; // ASSERT ZEXT 16
- FromVT = MVT::i16;
- } else if (NumSignBits > RegSize - 32) {
- isSExt = true; // ASSERT SEXT 32
- FromVT = MVT::i32;
- } else if (NumZeroBits >= RegSize - 32) {
- isSExt = false; // ASSERT ZEXT 32
- FromVT = MVT::i32;
+ if (NumZeroBits) {
+ FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits);
+ isSExt = false;
+ } else if (NumSignBits > 1) {
+ FromVT =
+ EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1);
+ isSExt = true;
} else {
continue;
}
@@ -876,8 +860,8 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
unsigned NumParts = RegCount[Value];
MVT RegisterVT = IsABIMangled
- ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
- : RegVTs[Value];
+ ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value])
+ : RegVTs[Value];
if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
ExtendKind = ISD::ZERO_EXTEND;
@@ -970,6 +954,20 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
}
}
+SmallVector<std::pair<unsigned, unsigned>, 4>
+RegsForValue::getRegsAndSizes() const {
+ SmallVector<std::pair<unsigned, unsigned>, 4> OutVec;
+ unsigned I = 0;
+ for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
+ unsigned RegCount = std::get<0>(CountAndVT);
+ MVT RegisterVT = std::get<1>(CountAndVT);
+ unsigned RegisterSize = RegisterVT.getSizeInBits();
+ for (unsigned E = I + RegCount; I != E; ++I)
+ OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
+ }
+ return OutVec;
+}
+
void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
const TargetLibraryInfo *li) {
AA = aa;
@@ -1054,6 +1052,22 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
visit(I.getOpcode(), I);
+ if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) {
+ // Propagate the fast-math-flags of this IR instruction to the DAG node that
+ // maps to this instruction.
+ // TODO: We could handle all flags (nsw, etc) here.
+ // TODO: If an IR instruction maps to >1 node, only the final node will have
+ // flags set.
+ if (SDNode *Node = getNodeForIRValue(&I)) {
+ SDNodeFlags IncomingFlags;
+ IncomingFlags.copyFMF(*FPMO);
+ if (!Node->getFlags().isDefined())
+ Node->setFlags(IncomingFlags);
+ else
+ Node->intersectFlagsWith(IncomingFlags);
+ }
+ }
+
if (!isa<TerminatorInst>(&I) && !HasTailCall &&
!isStatepoint(&I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
@@ -1077,14 +1091,39 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
}
}
+void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
+ const DIExpression *Expr) {
+ auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
+ const DbgValueInst *DI = DDI.getDI();
+ DIVariable *DanglingVariable = DI->getVariable();
+ DIExpression *DanglingExpr = DI->getExpression();
+ if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) {
+ LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n");
+ return true;
+ }
+ return false;
+ };
+
+ for (auto &DDIMI : DanglingDebugInfoMap) {
+ DanglingDebugInfoVector &DDIV = DDIMI.second;
+ DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end());
+ }
+}
+
// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
// generate the debug data structures now that we've seen its definition.
void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
SDValue Val) {
- DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
- if (DDI.getDI()) {
+ auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V);
+ if (DanglingDbgInfoIt == DanglingDebugInfoMap.end())
+ return;
+
+ DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
+ for (auto &DDI : DDIV) {
const DbgValueInst *DI = DDI.getDI();
+ assert(DI && "Ill-formed DanglingDebugInfo");
DebugLoc dl = DDI.getdl();
+ unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
DILocalVariable *Variable = DI->getVariable();
DIExpression *Expr = DI->getExpression();
@@ -1093,13 +1132,26 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
SDDbgValue *SDV;
if (Val.getNode()) {
if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
- SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder);
+ LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
+ << DbgSDNodeOrder << "] for:\n " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
+ // Increase the SDNodeOrder for the DbgValue here to make sure it is
+ // inserted after the definition of Val when emitting the instructions
+ // after ISel. An alternative could be to teach
+ // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
+ LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
+ << "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
+ << ValSDNodeOrder << "\n");
+ SDV = getDbgValue(Val, Variable, Expr, dl,
+ std::max(DbgSDNodeOrder, ValSDNodeOrder));
DAG.AddDbgValue(SDV, Val.getNode(), false);
- }
+ } else
+ LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
+ << "in EmitFuncArgumentDbgValue\n");
} else
- DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
- DanglingDebugInfoMap[V] = DanglingDebugInfo();
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
}
+ DDIV.clear();
}
/// getCopyFromRegs - If there was virtual register allocated for the value V
@@ -1315,12 +1367,18 @@ void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
+ bool IsSEH = isAsynchronousEHPersonality(Pers);
+ bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX;
MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
+ if (!IsSEH)
+ CatchPadMBB->setIsEHScopeEntry();
// In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
if (IsMSVCCXX || IsCoreCLR)
CatchPadMBB->setIsEHFuncletEntry();
-
- DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot()));
+ // Wasm does not need catchpads anymore
+ if (!IsWasmCXX)
+ DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other,
+ getControlRoot()));
}
void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
@@ -1363,7 +1421,8 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
// Don't emit any special code for the cleanuppad instruction. It just marks
- // the start of a funclet.
+ // the start of an EH scope/funclet.
+ FuncInfo.MBB->setIsEHScopeEntry();
FuncInfo.MBB->setIsEHFuncletEntry();
FuncInfo.MBB->setIsCleanupFuncletEntry();
}
@@ -1385,6 +1444,7 @@ static void findUnwindDestinations(
classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+ bool IsSEH = isAsynchronousEHPersonality(Personality);
while (EHPadBB) {
const Instruction *Pad = EHPadBB->getFirstNonPHI();
@@ -1397,6 +1457,7 @@ static void findUnwindDestinations(
// Stop on cleanup pads. Cleanups are always funclet entries for all known
// personalities.
UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
UnwindDests.back().first->setIsEHFuncletEntry();
break;
} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
@@ -1406,6 +1467,8 @@ static void findUnwindDestinations(
// For MSVC++ and the CLR, catchblocks are funclets and need prologues.
if (IsMSVCCXX || IsCoreCLR)
UnwindDests.back().first->setIsEHFuncletEntry();
+ if (!IsSEH)
+ UnwindDests.back().first->setIsEHScopeEntry();
}
NewEHPadBB = CatchSwitch->getUnwindDest();
} else {
@@ -1653,8 +1716,7 @@ SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
if (!BPI) {
// If BPI is not available, set the default probability as 1 / N, where N is
// the number of successors.
- auto SuccSize = std::max<uint32_t>(
- std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1);
+ auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
return BranchProbability(1, SuccSize);
}
return BPI->getEdgeProbability(SrcBB, DstBB);
@@ -2489,8 +2551,8 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
assert(CC.Low == CC.High && "Input clusters must be single-case");
#endif
- std::sort(Clusters.begin(), Clusters.end(),
- [](const CaseCluster &a, const CaseCluster &b) {
+ llvm::sort(Clusters.begin(), Clusters.end(),
+ [](const CaseCluster &a, const CaseCluster &b) {
return a.Low->getValue().slt(b.Low->getValue());
});
@@ -2551,9 +2613,23 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
}
void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
- if (DAG.getTarget().Options.TrapUnreachable)
- DAG.setRoot(
- DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+ if (!DAG.getTarget().Options.TrapUnreachable)
+ return;
+
+ // We may be able to ignore unreachable behind a noreturn call.
+ if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
+ const BasicBlock &BB = *I.getParent();
+ if (&I != &BB.front()) {
+ BasicBlock::const_iterator PredI =
+ std::prev(BasicBlock::const_iterator(&I));
+ if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
+ if (Call->doesNotReturn())
+ return;
+ }
+ }
+ }
+
+ DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
}
void SelectionDAGBuilder::visitFSub(const User &I) {
@@ -2597,6 +2673,10 @@ static bool isVectorReductionOp(const User *I) {
}
unsigned ElemNum = Inst->getType()->getVectorNumElements();
+ // Ensure the reduction size is a power of 2.
+ if (!isPowerOf2_32(ElemNum))
+ return false;
+
unsigned ElemNumToReduce = ElemNum;
// Do DFS search on the def-use chain from the given instruction. We only
@@ -2682,7 +2762,7 @@ static bool isVectorReductionOp(const User *I) {
return false;
const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
- if (!Val || Val->getZExtValue() != 0)
+ if (!Val || !Val->isZero())
return false;
ReduxExtracted = true;
@@ -2693,45 +2773,23 @@ static bool isVectorReductionOp(const User *I) {
return ReduxExtracted;
}
-void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
- SDValue Op1 = getValue(I.getOperand(0));
- SDValue Op2 = getValue(I.getOperand(1));
-
- bool nuw = false;
- bool nsw = false;
- bool exact = false;
- bool vec_redux = false;
- FastMathFlags FMF;
-
- if (const OverflowingBinaryOperator *OFBinOp =
- dyn_cast<const OverflowingBinaryOperator>(&I)) {
- nuw = OFBinOp->hasNoUnsignedWrap();
- nsw = OFBinOp->hasNoSignedWrap();
+void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
+ SDNodeFlags Flags;
+ if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
+ Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
+ Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
+ }
+ if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) {
+ Flags.setExact(ExactOp->isExact());
}
- if (const PossiblyExactOperator *ExactOp =
- dyn_cast<const PossiblyExactOperator>(&I))
- exact = ExactOp->isExact();
- if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I))
- FMF = FPOp->getFastMathFlags();
-
if (isVectorReductionOp(&I)) {
- vec_redux = true;
- DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
+ Flags.setVectorReduction(true);
+ LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
}
- SDNodeFlags Flags;
- Flags.setExact(exact);
- Flags.setNoSignedWrap(nsw);
- Flags.setNoUnsignedWrap(nuw);
- Flags.setVectorReduction(vec_redux);
- Flags.setAllowReciprocal(FMF.allowReciprocal());
- Flags.setAllowContract(FMF.allowContract());
- Flags.setNoInfs(FMF.noInfs());
- Flags.setNoNaNs(FMF.noNaNs());
- Flags.setNoSignedZeros(FMF.noSignedZeros());
- Flags.setUnsafeAlgebra(FMF.isFast());
-
- SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(),
Op1, Op2, Flags);
setValue(&I, BinNodeValue);
}
@@ -2823,13 +2881,12 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
predicate = FCmpInst::Predicate(FC->getPredicate());
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- ISD::CondCode Condition = getFCmpCondCode(predicate);
- // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
- // FIXME: We should propagate the fast-math-flags to the DAG node itself for
- // further optimization, but currently FMF is only applicable to binary nodes.
- if (TM.Options.NoNaNsFPMath)
+ ISD::CondCode Condition = getFCmpCondCode(predicate);
+ auto *FPMO = dyn_cast<FPMathOperator>(&I);
+ if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
+
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
@@ -3424,10 +3481,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
DAG.getConstant(Offset, dl, N.getValueType()), Flags);
}
} else {
- MVT PtrTy =
- DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS);
- unsigned PtrSize = PtrTy.getSizeInBits();
- APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType()));
+ unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
+ MVT IdxTy = MVT::getIntegerVT(IdxSize);
+ APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));
// If this is a scalar constant or a splat vector of constants,
// handle it quickly.
@@ -3439,11 +3495,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (CI) {
if (CI->isZero())
continue;
- APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
+ APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
LLVMContext &Context = *DAG.getContext();
SDValue OffsVal = VectorWidth ?
- DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) :
- DAG.getConstant(Offs, dl, PtrTy);
+ DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
+ DAG.getConstant(Offs, dl, IdxTy);
// In an inbouds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
@@ -3867,7 +3923,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
// extract the splat value and use it as a uniform base.
// In all other cases the function returns 'false'.
static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
- SelectionDAGBuilder* SDB) {
+ SDValue &Scale, SelectionDAGBuilder* SDB) {
SelectionDAG& DAG = SDB->DAG;
LLVMContext &Context = *DAG.getContext();
@@ -3897,6 +3953,10 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
return false;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()),
+ SDB->getCurSDLoc(), TLI.getPointerTy(DL));
Base = SDB->getValue(Ptr);
Index = SDB->getValue(IndexVal);
@@ -3926,8 +3986,9 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDValue Base;
SDValue Index;
+ SDValue Scale;
const Value *BasePtr = Ptr;
- bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
+ bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
MachineMemOperand *MMO = DAG.getMachineFunction().
@@ -3935,10 +3996,11 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
MachineMemOperand::MOStore, VT.getStoreSize(),
Alignment, AAInfo);
if (!UniformBase) {
- Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
+ Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
- SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index };
+ SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
Ops, MMO);
DAG.setRoot(Scatter);
@@ -3997,10 +4059,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
ISD::NON_EXTLOAD, IsExpanding);
- if (AddToChain) {
- SDValue OutChain = Load.getValue(1);
- DAG.setRoot(OutChain);
- }
+ if (AddToChain)
+ PendingLoads.push_back(Load.getValue(1));
setValue(&I, Load);
}
@@ -4025,8 +4085,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDValue Root = DAG.getRoot();
SDValue Base;
SDValue Index;
+ SDValue Scale;
const Value *BasePtr = Ptr;
- bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
+ bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
bool ConstantMemory = false;
if (UniformBase &&
AA && AA->pointsToConstantMemory(MemoryLocation(
@@ -4044,10 +4105,11 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
Alignment, AAInfo, Ranges);
if (!UniformBase) {
- Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
+ Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
- SDValue Ops[] = { Root, Src0, Mask, Base, Index };
+ SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
Ops, MMO);
@@ -4868,26 +4930,18 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
const auto &TLI = DAG.getTargetLoweringInfo();
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
V->getType(), isABIRegCopy(V));
- unsigned NumRegs =
- std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0);
- if (NumRegs > 1) {
- unsigned I = 0;
+ if (RFV.occupiesMultipleRegs()) {
unsigned Offset = 0;
- auto RegisterVT = RFV.RegVTs.begin();
- for (auto RegCount : RFV.RegCount) {
- unsigned RegisterSize = (RegisterVT++)->getSizeInBits();
- for (unsigned E = I + RegCount; I != E; ++I) {
- // The vregs are guaranteed to be allocated in sequence.
- Op = MachineOperand::CreateReg(VMI->second + I, false);
- auto FragmentExpr = DIExpression::createFragmentExpression(
- Expr, Offset, RegisterSize);
- if (!FragmentExpr)
- continue;
- FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
- Op->getReg(), Variable, *FragmentExpr));
- Offset += RegisterSize;
- }
+ for (auto RegAndSize : RFV.getRegsAndSizes()) {
+ Op = MachineOperand::CreateReg(RegAndSize.first, false);
+ auto FragmentExpr = DIExpression::createFragmentExpression(
+ Expr, Offset, RegAndSize.second);
+ if (!FragmentExpr)
+ continue;
+ FuncInfo.ArgDbgValues.push_back(
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
+ Op->getReg(), Variable, *FragmentExpr));
+ Offset += RegAndSize.second;
}
return true;
}
@@ -4901,17 +4955,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- if (Op->isReg())
- FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
- Op->getReg(), Variable, Expr));
- else
- FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
- .add(*Op)
- .addImm(0)
- .addMetadata(Variable)
- .addMetadata(Expr));
+ IsIndirect = (Op->isReg()) ? IsIndirect : true;
+ FuncInfo.ArgDbgValues.push_back(
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+ *Op, Variable, Expr));
return true;
}
@@ -4924,13 +4971,20 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
unsigned DbgSDNodeOrder) {
if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
- // stack slot locations as such instead of as indirectly addressed
- // locations.
- return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl,
- DbgSDNodeOrder);
+ // stack slot locations.
+ //
+ // Consider "int x = 0; int *px = &x;". There are two kinds of interesting
+ // debug values here after optimization:
+ //
+ // dbg.value(i32* %px, !"int *px", !DIExpression()), and
+ // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
+ //
+ // Both describe the direct values of their associated variables.
+ return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(),
+ /*IsIndirect*/ false, dl, DbgSDNodeOrder);
}
- return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl,
- DbgSDNodeOrder);
+ return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(),
+ /*IsIndirect*/ false, dl, DbgSDNodeOrder);
}
// VisualStudio defines setjmp as _setjmp
@@ -5000,14 +5054,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::longjmp:
return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
case Intrinsic::memcpy: {
+ const auto &MCI = cast<MemCpyInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
- unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
- if (!Align)
- Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
- bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ // @llvm.memcpy defines 0 and 1 to both mean no alignment.
+ unsigned DstAlign = std::max<unsigned>(MCI.getDestAlignment(), 1);
+ unsigned SrcAlign = std::max<unsigned>(MCI.getSourceAlignment(), 1);
+ unsigned Align = MinAlign(DstAlign, SrcAlign);
+ bool isVol = MCI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ // FIXME: Support passing different dest/src alignments to the memcpy DAG
+ // node.
SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
false, isTC,
MachinePointerInfo(I.getArgOperand(0)),
@@ -5016,13 +5074,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memset: {
+ const auto &MSI = cast<MemSetInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
- unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
- if (!Align)
- Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment.
- bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ // @llvm.memset defines 0 and 1 to both mean no alignment.
+ unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1);
+ bool isVol = MSI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)));
@@ -5030,14 +5088,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memmove: {
+ const auto &MMI = cast<MemMoveInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
- unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
- if (!Align)
- Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment.
- bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ // @llvm.memmove defines 0 and 1 to both mean no alignment.
+ unsigned DstAlign = std::max<unsigned>(MMI.getDestAlignment(), 1);
+ unsigned SrcAlign = std::max<unsigned>(MMI.getSourceAlignment(), 1);
+ unsigned Align = MinAlign(DstAlign, SrcAlign);
+ bool isVol = MMI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ // FIXME: Support passing different dest/src alignments to the memmove DAG
+ // node.
SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
@@ -5050,36 +5112,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
- // Emit a library call.
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
- Entry.Node = Dst;
- Args.push_back(Entry);
-
- Entry.Node = Src;
- Args.push_back(Entry);
-
- Entry.Ty = MI.getLength()->getType();
- Entry.Node = Length;
- Args.push_back(Entry);
-
- uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
- RTLIB::Libcall LibraryCall =
- RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
- if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
- report_fatal_error("Unsupported element size");
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
- TLI.getLibcallCallingConv(LibraryCall),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
-
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
- DAG.setRoot(CallResult.second);
+ unsigned DstAlign = MI.getDestAlignment();
+ unsigned SrcAlign = MI.getSourceAlignment();
+ Type *LengthTy = MI.getLength()->getType();
+ unsigned ElemSz = MI.getElementSizeInBytes();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src,
+ SrcAlign, Length, LengthTy, ElemSz, isTC,
+ MachinePointerInfo(MI.getRawDest()),
+ MachinePointerInfo(MI.getRawSource()));
+ updateDAGForMaybeTailCall(MC);
return nullptr;
}
case Intrinsic::memmove_element_unordered_atomic: {
@@ -5088,36 +5130,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
- // Emit a library call.
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
- Entry.Node = Dst;
- Args.push_back(Entry);
-
- Entry.Node = Src;
- Args.push_back(Entry);
-
- Entry.Ty = MI.getLength()->getType();
- Entry.Node = Length;
- Args.push_back(Entry);
-
- uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
- RTLIB::Libcall LibraryCall =
- RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
- if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
- report_fatal_error("Unsupported element size");
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
- TLI.getLibcallCallingConv(LibraryCall),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
-
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
- DAG.setRoot(CallResult.second);
+ unsigned DstAlign = MI.getDestAlignment();
+ unsigned SrcAlign = MI.getSourceAlignment();
+ Type *LengthTy = MI.getLength()->getType();
+ unsigned ElemSz = MI.getElementSizeInBytes();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src,
+ SrcAlign, Length, LengthTy, ElemSz, isTC,
+ MachinePointerInfo(MI.getRawDest()),
+ MachinePointerInfo(MI.getRawSource()));
+ updateDAGForMaybeTailCall(MC);
return nullptr;
}
case Intrinsic::memset_element_unordered_atomic: {
@@ -5126,37 +5148,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Val = getValue(MI.getValue());
SDValue Length = getValue(MI.getLength());
- // Emit a library call.
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
- Entry.Node = Dst;
- Args.push_back(Entry);
-
- Entry.Ty = Type::getInt8Ty(*DAG.getContext());
- Entry.Node = Val;
- Args.push_back(Entry);
-
- Entry.Ty = MI.getLength()->getType();
- Entry.Node = Length;
- Args.push_back(Entry);
-
- uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
- RTLIB::Libcall LibraryCall =
- RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
- if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
- report_fatal_error("Unsupported element size");
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
- TLI.getLibcallCallingConv(LibraryCall),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
-
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
- DAG.setRoot(CallResult.second);
+ unsigned DstAlign = MI.getDestAlignment();
+ Type *LengthTy = MI.getLength()->getType();
+ unsigned ElemSz = MI.getElementSizeInBytes();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length,
+ LengthTy, ElemSz, isTC,
+ MachinePointerInfo(MI.getRawDest()));
+ updateDAGForMaybeTailCall(MC);
return nullptr;
}
case Intrinsic::dbg_addr:
@@ -5164,13 +5163,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I);
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
+ dropDanglingDebugInfo(Variable, Expression);
assert(Variable && "Missing variable");
// Check if address has undef value.
const Value *Address = DI.getVariableLocation();
if (!Address || isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
- DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
return nullptr;
}
@@ -5195,10 +5195,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
// the MachineFunction variable table.
if (FI != std::numeric_limits<int>::max()) {
- if (Intrinsic == Intrinsic::dbg_addr)
- DAG.AddDbgValue(DAG.getFrameIndexDbgValue(Variable, Expression, FI, dl,
- SDNodeOrder),
- getRoot().getNode(), isParameter);
+ if (Intrinsic == Intrinsic::dbg_addr) {
+ SDDbgValue *SDV = DAG.getFrameIndexDbgValue(
+ Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter);
+ }
return nullptr;
}
@@ -5214,8 +5215,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
if (isParameter && FINode) {
// Byval parameter. We have a frame index at this point.
- SDV = DAG.getFrameIndexDbgValue(Variable, Expression,
- FINode->getIndex(), dl, SDNodeOrder);
+ SDV =
+ DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(),
+ /*IsIndirect*/ true, dl, SDNodeOrder);
} else if (isa<Argument>(Address)) {
// Address is an argument, so try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
@@ -5231,17 +5233,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// virtual register info from the FuncInfo.ValueMap.
if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
N)) {
- DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
}
return nullptr;
}
+ case Intrinsic::dbg_label: {
+ const DbgLabelInst &DI = cast<DbgLabelInst>(I);
+ DILabel *Label = DI.getLabel();
+ assert(Label && "Missing label");
+
+ SDDbgLabel *SDV;
+ SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder);
+ DAG.AddDbgLabel(SDV);
+ return nullptr;
+ }
case Intrinsic::dbg_value: {
const DbgValueInst &DI = cast<DbgValueInst>(I);
assert(DI.getVariable() && "Missing variable");
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
+ dropDanglingDebugInfo(Variable, Expression);
const Value *V = DI.getValue();
if (!V)
return nullptr;
@@ -5266,16 +5279,64 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
+ // PHI nodes have already been selected, so we should know which VReg that
+ // is assigns to already.
+ if (isa<PHINode>(V)) {
+ auto VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end()) {
+ unsigned Reg = VMI->second;
+ // The PHI node may be split up into several MI PHI nodes (in
+ // FunctionLoweringInfo::set).
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
+ V->getType(), false);
+ if (RFV.occupiesMultipleRegs()) {
+ unsigned Offset = 0;
+ unsigned BitsToDescribe = 0;
+ if (auto VarSize = Variable->getSizeInBits())
+ BitsToDescribe = *VarSize;
+ if (auto Fragment = Expression->getFragmentInfo())
+ BitsToDescribe = Fragment->SizeInBits;
+ for (auto RegAndSize : RFV.getRegsAndSizes()) {
+ unsigned RegisterSize = RegAndSize.second;
+ // Bail out if all bits are described already.
+ if (Offset >= BitsToDescribe)
+ break;
+ unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
+ ? BitsToDescribe - Offset
+ : RegisterSize;
+ auto FragmentExpr = DIExpression::createFragmentExpression(
+ Expression, Offset, FragmentSize);
+ if (!FragmentExpr)
+ continue;
+ SDV = DAG.getVRegDbgValue(Variable, *FragmentExpr, RegAndSize.first,
+ false, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ Offset += RegisterSize;
+ }
+ } else {
+ SDV = DAG.getVRegDbgValue(Variable, Expression, Reg, false, dl,
+ SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ }
+ return nullptr;
+ }
+ }
+
+ // TODO: When we get here we will either drop the dbg.value completely, or
+ // we try to move it forward by letting it dangle for awhile. So we should
+ // probably add an extra DbgValue to the DAG here, with a reference to
+ // "noreg", to indicate that we have lost the debug location for the
+ // variable.
+
if (!V->use_empty() ) {
// Do not call getValue(V) yet, as we don't want to generate code.
// Remember it for later.
- DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
- DanglingDebugInfoMap[V] = DDI;
+ DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder);
return nullptr;
}
- DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
- DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
+ LLVM_DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
return nullptr;
}
@@ -5609,6 +5670,52 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
return nullptr;
}
+ case Intrinsic::fshl:
+ case Intrinsic::fshr: {
+ bool IsFSHL = Intrinsic == Intrinsic::fshl;
+ SDValue X = getValue(I.getArgOperand(0));
+ SDValue Y = getValue(I.getArgOperand(1));
+ SDValue Z = getValue(I.getArgOperand(2));
+ EVT VT = X.getValueType();
+
+ // When X == Y, this is rotate. Create the node directly if legal.
+ // TODO: This should also be done if the operation is custom, but we have
+ // to make sure targets are handling the modulo shift amount as expected.
+ // TODO: If the rotate direction (left or right) corresponding to the shift
+ // is not available, adjust the shift value and invert the direction.
+ auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
+ if (X == Y && TLI.isOperationLegal(RotateOpcode, VT)) {
+ setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
+ return nullptr;
+ }
+
+ // Get the shift amount and inverse shift amount, modulo the bit-width.
+ SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT);
+ SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC);
+ SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, Z);
+ SDValue InvShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC);
+
+ // fshl: (X << (Z % BW)) | (Y >> ((BW - Z) % BW))
+ // fshr: (X << ((BW - Z) % BW)) | (Y >> (Z % BW))
+ SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt);
+ SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt);
+ SDValue Res = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY);
+
+ // If (Z % BW == 0), then (BW - Z) % BW is also zero, so the result would
+ // be X | Y. If X == Y (rotate), that's fine. If not, we have to select.
+ if (X != Y) {
+ SDValue Zero = DAG.getConstant(0, sdl, VT);
+ EVT CCVT = MVT::i1;
+ if (VT.isVector())
+ CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements());
+ // For fshl, 0 shift returns the 1st arg (X).
+ // For fshr, 0 shift returns the 2nd arg (Y).
+ SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
+ Res = DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Res);
+ }
+ setValue(&I, Res);
+ return nullptr;
+ }
case Intrinsic::stacksave: {
SDValue Op = getRoot();
Res = DAG.getNode(
@@ -5703,7 +5810,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::annotation:
case Intrinsic::ptr_annotation:
- case Intrinsic::invariant_group_barrier:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
// Drop the intrinsic, but forward the value
setValue(&I, getValue(I.getOperand(0)));
return nullptr;
@@ -5822,17 +5930,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Ops[5];
unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
- Ops[0] = getRoot();
+ Ops[0] = DAG.getRoot();
Ops[1] = getValue(I.getArgOperand(0));
Ops[2] = getValue(I.getArgOperand(1));
Ops[3] = getValue(I.getArgOperand(2));
Ops[4] = getValue(I.getArgOperand(3));
- DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl,
- DAG.getVTList(MVT::Other), Ops,
- EVT::getIntegerVT(*Context, 8),
- MachinePointerInfo(I.getArgOperand(0)),
- 0, /* align */
- Flags));
+ SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl,
+ DAG.getVTList(MVT::Other), Ops,
+ EVT::getIntegerVT(*Context, 8),
+ MachinePointerInfo(I.getArgOperand(0)),
+ 0, /* align */
+ Flags);
+
+ // Chain the prefetch in parallell with any pending loads, to stay out of
+ // the way of later optimizations.
+ PendingLoads.push_back(Result);
+ Result = getRoot();
+ DAG.setRoot(Result);
return nullptr;
}
case Intrinsic::lifetime_start:
@@ -6004,6 +6118,41 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, patchableNode);
return nullptr;
}
+ case Intrinsic::xray_typedevent: {
+ // Here we want to make sure that the intrinsic behaves as if it has a
+ // specific calling convention, and only for x86_64.
+ // FIXME: Support other platforms later.
+ const auto &Triple = DAG.getTarget().getTargetTriple();
+ if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ return nullptr;
+
+ SDLoc DL = getCurSDLoc();
+ SmallVector<SDValue, 8> Ops;
+
+ // We want to say that we always want the arguments in registers.
+ // It's unclear to me how manipulating the selection DAG here forces callers
+ // to provide arguments in registers instead of on the stack.
+ SDValue LogTypeId = getValue(I.getArgOperand(0));
+ SDValue LogEntryVal = getValue(I.getArgOperand(1));
+ SDValue StrSizeVal = getValue(I.getArgOperand(2));
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Chain = getRoot();
+ Ops.push_back(LogTypeId);
+ Ops.push_back(LogEntryVal);
+ Ops.push_back(StrSizeVal);
+ Ops.push_back(Chain);
+
+ // We need to enforce the calling convention for the callsite, so that
+ // argument ordering is enforced correctly, and that register allocation can
+ // see that some registers may be assumed clobbered and have to preserve
+ // them across calls to the intrinsic.
+ MachineSDNode *MN = DAG.getMachineNode(
+ TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops);
+ SDValue patchableNode = SDValue(MN, 0);
+ DAG.setRoot(patchableNode);
+ setValue(&I, patchableNode);
+ return nullptr;
+ }
case Intrinsic::experimental_deoptimize:
LowerDeoptimizeCall(&I);
return nullptr;
@@ -6023,6 +6172,66 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_vector_reduce_fmin:
visitVectorReduce(I, Intrinsic);
return nullptr;
+
+ case Intrinsic::icall_branch_funnel: {
+ SmallVector<SDValue, 16> Ops;
+ Ops.push_back(DAG.getRoot());
+ Ops.push_back(getValue(I.getArgOperand(0)));
+
+ int64_t Offset;
+ auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
+ I.getArgOperand(1), Offset, DAG.getDataLayout()));
+ if (!Base)
+ report_fatal_error(
+ "llvm.icall.branch.funnel operand must be a GlobalValue");
+ Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0));
+
+ struct BranchFunnelTarget {
+ int64_t Offset;
+ SDValue Target;
+ };
+ SmallVector<BranchFunnelTarget, 8> Targets;
+
+ for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) {
+ auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
+ I.getArgOperand(Op), Offset, DAG.getDataLayout()));
+ if (ElemBase != Base)
+ report_fatal_error("all llvm.icall.branch.funnel operands must refer "
+ "to the same GlobalValue");
+
+ SDValue Val = getValue(I.getArgOperand(Op + 1));
+ auto *GA = dyn_cast<GlobalAddressSDNode>(Val);
+ if (!GA)
+ report_fatal_error(
+ "llvm.icall.branch.funnel operand must be a GlobalValue");
+ Targets.push_back({Offset, DAG.getTargetGlobalAddress(
+ GA->getGlobal(), getCurSDLoc(),
+ Val.getValueType(), GA->getOffset())});
+ }
+ llvm::sort(Targets.begin(), Targets.end(),
+ [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
+ return T1.Offset < T2.Offset;
+ });
+
+ for (auto &T : Targets) {
+ Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32));
+ Ops.push_back(T.Target);
+ }
+
+ SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL,
+ getCurSDLoc(), MVT::Other, Ops),
+ 0);
+ DAG.setRoot(N);
+ setValue(&I, N);
+ HasTailCall = true;
+ return nullptr;
+ }
+
+ case Intrinsic::wasm_landingpad_index: {
+ // TODO store landing pad index in a map, which will be used when generating
+ // LSDA information
+ return nullptr;
+ }
}
}
@@ -6172,7 +6381,10 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
// Inform MachineModuleInfo of range.
- if (MF.hasEHFunclets()) {
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ // There is a platform (e.g. wasm) that uses funclet style IR but does not
+ // actually use outlined funclets and their LSDA info style.
+ if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
assert(CLI.CS);
WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()),
@@ -6630,14 +6842,13 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
const char *RenameFn = nullptr;
if (Function *F = I.getCalledFunction()) {
if (F->isDeclaration()) {
- if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) {
- if (unsigned IID = II->getIntrinsicID(F)) {
- RenameFn = visitIntrinsicCall(I, IID);
- if (!RenameFn)
- return;
- }
- }
- if (Intrinsic::ID IID = F->getIntrinsicID()) {
+ // Is this an LLVM intrinsic or a target-specific intrinsic?
+ unsigned IID = F->getIntrinsicID();
+ if (!IID)
+ if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo())
+ IID = II->getIntrinsicID(F);
+
+ if (IID) {
RenameFn = visitIntrinsicCall(I, IID);
if (!RenameFn)
return;
@@ -6989,27 +7200,37 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
unsigned NumRegs = 1;
if (OpInfo.ConstraintVT != MVT::Other) {
- // If this is a FP input in an integer register (or visa versa) insert a bit
- // cast of the input value. More generally, handle any case where the input
- // value disagrees with the register class we plan to stick this in.
- if (OpInfo.Type == InlineAsm::isInput && PhysReg.second &&
+ // If this is a FP operand in an integer register (or visa versa), or more
+ // generally if the operand value disagrees with the register class we plan
+ // to stick it in, fix the operand type.
+ //
+ // If this is an input value, the bitcast to the new type is done now.
+ // Bitcast for output value is done at the end of visitInlineAsm().
+ if ((OpInfo.Type == InlineAsm::isOutput ||
+ OpInfo.Type == InlineAsm::isInput) &&
+ PhysReg.second &&
!TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) {
// Try to convert to the first EVT that the reg class contains. If the
// types are identical size, use a bitcast to convert (e.g. two differing
- // vector types).
+ // vector types). Note: output bitcast is done at the end of
+ // visitInlineAsm().
MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second);
- if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) {
- OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
- RegVT, OpInfo.CallOperand);
+ if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+ // Exclude indirect inputs while they are unsupported because the code
+ // to perform the load is missing and thus OpInfo.CallOperand still
+ // refer to the input address rather than the pointed-to value.
+ if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect)
+ OpInfo.CallOperand =
+ DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
+ // If the operand is a FP value and we want it in integer registers,
+ // use the corresponding integer type. This turns an f64 value into
+ // i64, which can be passed with two i32 values on a 32-bit machine.
} else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
- // If the input is a FP value and we want it in FP registers, do a
- // bitcast to the corresponding integer type. This turns an f64 value
- // into i64, which can be passed with two i32 values on a 32-bit
- // machine.
RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
- OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
- RegVT, OpInfo.CallOperand);
+ if (OpInfo.Type == InlineAsm::isInput)
+ OpInfo.CallOperand =
+ DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
}
}
@@ -7246,7 +7467,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
continue;
// If this is a memory input, and if the operand is not indirect, do what we
- // need to to provide an address for the memory input.
+ // need to provide an address for the memory input.
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
!OpInfo.isIndirect) {
assert((OpInfo.isMultipleAlternative ||
@@ -7521,12 +7742,18 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType());
- // If any of the results of the inline asm is a vector, it may have the
- // wrong width/num elts. This can happen for register classes that can
- // contain multiple different value types. The preg or vreg allocated may
- // not have the same VT as was expected. Convert it to the right type
- // with bit_convert.
- if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
+ // If the type of the inline asm call site return value is different but
+ // has same size as the type of the asm output bitcast it. One example
+ // of this is for vectors with different width / number of elements.
+ // This can happen for register classes that can contain multiple
+ // different value types. The preg or vreg allocated may not have the
+ // same VT as was expected.
+ //
+ // This can also happen for a return value that disagrees with the
+ // register class it is put in, eg. a double in a general-purpose
+ // register on a 32-bit machine.
+ if (ResultType != Val.getValueType() &&
+ ResultType.getSizeInBits() == Val.getValueSizeInBits()) {
Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(),
ResultType, Val);
@@ -7581,8 +7808,17 @@ void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,
// Make sure we leave the DAG in a valid state
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- auto VT = TLI.getValueType(DAG.getDataLayout(), CS.getType());
- setValue(CS.getInstruction(), DAG.getUNDEF(VT));
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
+
+ if (ValueVTs.empty())
+ return;
+
+ SmallVector<SDValue, 1> Ops;
+ for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i)
+ Ops.push_back(DAG.getUNDEF(ValueVTs[i]));
+
+ setValue(CS.getInstruction(), DAG.getMergeValues(Ops, getCurSDLoc()));
}
void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
@@ -7656,7 +7892,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
return DAG.getMergeValues(Ops, SL);
}
-/// \brief Populate a CallLowerinInfo (into \p CLI) based on the properties of
+/// Populate a CallLowerinInfo (into \p CLI) based on the properties of
/// the call being lowered.
///
/// This is a helper for lowering intrinsics that follow a target calling
@@ -7680,7 +7916,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
TargetLowering::ArgListEntry Entry;
Entry.Node = getValue(V);
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, ArgIdx);
+ Entry.setAttributes(&CS, ArgI);
Args.push_back(Entry);
}
@@ -7691,7 +7927,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
.setIsPatchPoint(IsPatchPoint);
}
-/// \brief Add a stack map intrinsic call's live variable operands to a stackmap
+/// Add a stack map intrinsic call's live variable operands to a stackmap
/// or patchpoint target node's operand list.
///
/// Constants are converted to TargetConstants purely as an optimization to
@@ -7727,7 +7963,7 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
}
}
-/// \brief Lower llvm.experimental.stackmap directly to its target opcode.
+/// Lower llvm.experimental.stackmap directly to its target opcode.
void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
// void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
// [live variables...])
@@ -7790,7 +8026,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
FuncInfo.MF->getFrameInfo().setHasStackMap();
}
-/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
+/// Lower llvm.experimental.patchpoint directly to its target opcode.
void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
const BasicBlock *EHPadBB) {
// void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
@@ -7954,8 +8190,6 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
FastMathFlags FMF;
if (isa<FPMathOperator>(I))
FMF = I.getFastMathFlags();
- SDNodeFlags SDFlags;
- SDFlags.setNoNaNs(FMF.noNaNs());
switch (Intrinsic) {
case Intrinsic::experimental_vector_reduce_fadd:
@@ -7998,10 +8232,10 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
break;
case Intrinsic::experimental_vector_reduce_fmax:
- Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
+ Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1);
break;
case Intrinsic::experimental_vector_reduce_fmin:
- Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
+ Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1);
break;
default:
llvm_unreachable("Unhandled vector reduce intrinsic");
@@ -8220,8 +8454,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
else if (Args[i].IsZExt)
ExtendKind = ISD::ZERO_EXTEND;
- // Conservatively only handle 'returned' on non-vectors for now
- if (Args[i].IsReturned && !Op.getValueType().isVector()) {
+ // Conservatively only handle 'returned' on non-vectors that can be lowered,
+ // for now.
+ if (Args[i].IsReturned && !Op.getValueType().isVector() &&
+ CanLowerReturn) {
assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
"unexpected use of 'returned'");
// Before passing 'returned' to the target lowering code, ensure that
@@ -8500,7 +8736,8 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
continue;
}
- DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n');
+ LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI
+ << '\n');
// Mark this alloca and store for argument copy elision.
*Info = StaticAllocaInfo::Elidable;
@@ -8541,8 +8778,9 @@ static void tryToElideArgumentCopy(
int OldIndex = AllocaIndex;
MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();
if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
- DEBUG(dbgs() << " argument copy elision failed due to bad fixed stack "
- "object size\n");
+ LLVM_DEBUG(
+ dbgs() << " argument copy elision failed due to bad fixed stack "
+ "object size\n");
return;
}
unsigned RequiredAlignment = AI->getAlignment();
@@ -8551,16 +8789,16 @@ static void tryToElideArgumentCopy(
AI->getAllocatedType());
}
if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
- DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
- "greater than stack argument alignment ("
- << RequiredAlignment << " vs "
- << MFI.getObjectAlignment(FixedIndex) << ")\n");
+ LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
+ "greater than stack argument alignment ("
+ << RequiredAlignment << " vs "
+ << MFI.getObjectAlignment(FixedIndex) << ")\n");
return;
}
// Perform the elision. Delete the old stack object and replace its only use
// in the variable info map. Mark the stack object as mutable.
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
<< " Replacing frame index " << OldIndex << " with " << FixedIndex
<< '\n';
@@ -8732,14 +8970,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
"LowerFormalArguments didn't return a valid chain!");
assert(InVals.size() == Ins.size() &&
"LowerFormalArguments didn't emit the correct number of values!");
- DEBUG({
- for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- assert(InVals[i].getNode() &&
- "LowerFormalArguments emitted a null value!");
- assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
- "LowerFormalArguments emitted a value with the wrong type!");
- }
- });
+ LLVM_DEBUG({
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() &&
+ "LowerFormalArguments emitted a null value!");
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+ "LowerFormalArguments emitted a value with the wrong type!");
+ }
+ });
// Update the DAG with the new chain value resulting from argument lowering.
DAG.setRoot(NewRoot);
@@ -9351,7 +9589,7 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
}
BitTestInfo BTI;
- std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
+ llvm::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
// Sort by probability first, number of bits second, bit mask third.
if (a.ExtraProb != b.ExtraProb)
return a.ExtraProb > b.ExtraProb;
@@ -9550,15 +9788,15 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
// checked first. However, two clusters can have the same probability in
// which case their relative ordering is non-deterministic. So we use Low
// as a tie-breaker as clusters are guaranteed to never overlap.
- std::sort(W.FirstCluster, W.LastCluster + 1,
- [](const CaseCluster &a, const CaseCluster &b) {
+ llvm::sort(W.FirstCluster, W.LastCluster + 1,
+ [](const CaseCluster &a, const CaseCluster &b) {
return a.Prob != b.Prob ?
a.Prob > b.Prob :
a.Low->getValue().slt(b.Low->getValue());
});
// Rearrange the case blocks so that the last one falls through if possible
- // without without changing the order of probabilities.
+ // without changing the order of probabilities.
for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
--I;
if (I->Prob > W.LastCluster->Prob)
@@ -9883,8 +10121,8 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
if (!SwitchPeeled)
return SwitchMBB;
- DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " << TopCaseProb
- << "\n");
+ LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: "
+ << TopCaseProb << "\n");
// Record the MBB for the peeled switch statement.
MachineFunction::iterator BBI(SwitchMBB);
@@ -9901,10 +10139,11 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
Clusters.erase(PeeledCaseIt);
for (CaseCluster &CC : Clusters) {
- DEBUG(dbgs() << "Scale the probablity for one cluster, before scaling: "
- << CC.Prob << "\n");
+ LLVM_DEBUG(
+ dbgs() << "Scale the probablity for one cluster, before scaling: "
+ << CC.Prob << "\n");
CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb);
- DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
+ LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
}
PeeledCaseProb = TopCaseProb;
return PeeledSwitchMBB;
@@ -9983,11 +10222,13 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
findJumpTables(Clusters, &SI, DefaultMBB);
findBitTestClusters(Clusters, &SI);
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Case clusters: ";
for (const CaseCluster &C : Clusters) {
- if (C.Kind == CC_JumpTable) dbgs() << "JT:";
- if (C.Kind == CC_BitTests) dbgs() << "BT:";
+ if (C.Kind == CC_JumpTable)
+ dbgs() << "JT:";
+ if (C.Kind == CC_BitTests)
+ dbgs() << "BT:";
C.Low->getValue().print(dbgs(), true);
if (C.Low != C.High) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 9e7c2bc6821b..e421984b8af2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -21,7 +21,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -33,6 +32,7 @@
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -116,9 +116,12 @@ class SelectionDAGBuilder {
unsigned getSDNodeOrder() { return SDNodeOrder; }
};
+ /// DanglingDebugInfoVector - Helper type for DanglingDebugInfoMap.
+ typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector;
+
/// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not
/// yet seen the referent. We defer handling these until we do see it.
- DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap;
+ DenseMap<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap;
public:
/// PendingLoads - Loads are not emitted to the program immediately. We bunch
@@ -671,6 +674,12 @@ public:
/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
SDValue getCopyFromRegs(const Value *V, Type *Ty);
+ /// If we have dangling debug info that describes \p Variable, or an
+ /// overlapping part of variable considering the \p Expr, then this method
+ /// weill drop that debug info as it isn't valid any longer.
+ void dropDanglingDebugInfo(const DILocalVariable *Variable,
+ const DIExpression *Expr);
+
// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
// generate the debug data structures now that we've seen its definition.
void resolveDanglingDebugInfo(const Value *V, SDValue Val);
@@ -678,6 +687,13 @@ public:
SDValue getValue(const Value *V);
bool findValue(const Value *V) const;
+ /// Return the SDNode for the specified IR value if it exists.
+ SDNode *getNodeForIRValue(const Value *V) {
+ if (NodeMap.find(V) == NodeMap.end())
+ return nullptr;
+ return NodeMap[V].getNode();
+ }
+
SDValue getNonRegisterValue(const Value *V);
SDValue getValueImpl(const Value *V);
@@ -696,13 +712,13 @@ public:
void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- Instruction::BinaryOps Opc, BranchProbability TW,
- BranchProbability FW, bool InvertCond);
+ Instruction::BinaryOps Opc, BranchProbability TProb,
+ BranchProbability FProb, bool InvertCond);
void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- BranchProbability TW, BranchProbability FW,
+ BranchProbability TProb, BranchProbability FProb,
bool InvertCond);
bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
@@ -774,11 +790,11 @@ public:
};
/// Lower \p SLI into a STATEPOINT instruction.
- SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SLI);
+ SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SI);
// This function is responsible for the whole statepoint lowering process.
// It uniformly handles invoke and call statepoints.
- void LowerStatepoint(ImmutableStatepoint Statepoint,
+ void LowerStatepoint(ImmutableStatepoint ISP,
const BasicBlock *EHPadBB = nullptr);
void LowerCallSiteWithDeoptBundle(ImmutableCallSite CS, SDValue Callee,
@@ -838,7 +854,7 @@ private:
void visitInvoke(const InvokeInst &I);
void visitResume(const ResumeInst &I);
- void visitBinary(const User &I, unsigned OpCode);
+ void visitBinary(const User &I, unsigned Opcode);
void visitShift(const User &I, unsigned Opcode);
void visitAdd(const User &I) { visitBinary(I, ISD::ADD); }
void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
@@ -881,7 +897,7 @@ private:
void visitExtractValue(const User &I);
void visitInsertValue(const User &I);
- void visitLandingPad(const LandingPadInst &I);
+ void visitLandingPad(const LandingPadInst &LP);
void visitGetElementPtr(const User &I);
void visitSelect(const User &I);
@@ -926,7 +942,7 @@ private:
const BasicBlock *EHPadBB = nullptr);
// These two are implemented in StatepointLowering.cpp
- void visitGCRelocate(const GCRelocateInst &I);
+ void visitGCRelocate(const GCRelocateInst &Relocate);
void visitGCResult(const GCResultInst &I);
void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
@@ -1036,9 +1052,17 @@ struct RegsForValue {
/// Add this value to the specified inlineasm node operand list. This adds the
/// code marker, matching input operand index (if applicable), and includes
/// the number of values added into it.
- void AddInlineAsmOperands(unsigned Kind, bool HasMatching,
+ void AddInlineAsmOperands(unsigned Code, bool HasMatching,
unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
+
+ /// Check if the total RegCount is greater than one.
+ bool occupiesMultipleRegs() const {
+ return std::accumulate(RegCount.begin(), RegCount.end(), 0) > 1;
+ }
+
+ /// Return a list of registers and their sizes.
+ SmallVector<std::pair<unsigned, unsigned>, 4> getRegsAndSizes() const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index dd30dc16378c..fa341e8b5fa5 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -28,18 +27,21 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -85,6 +87,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_CLR: return "AtomicLoadClr";
case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
@@ -176,20 +179,30 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FMAXNAN: return "fmaxnan";
case ISD::FNEG: return "fneg";
case ISD::FSQRT: return "fsqrt";
+ case ISD::STRICT_FSQRT: return "strict_fsqrt";
case ISD::FSIN: return "fsin";
+ case ISD::STRICT_FSIN: return "strict_fsin";
case ISD::FCOS: return "fcos";
+ case ISD::STRICT_FCOS: return "strict_fcos";
case ISD::FSINCOS: return "fsincos";
case ISD::FTRUNC: return "ftrunc";
case ISD::FFLOOR: return "ffloor";
case ISD::FCEIL: return "fceil";
case ISD::FRINT: return "frint";
+ case ISD::STRICT_FRINT: return "strict_frint";
case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint";
case ISD::FROUND: return "fround";
case ISD::FEXP: return "fexp";
+ case ISD::STRICT_FEXP: return "strict_fexp";
case ISD::FEXP2: return "fexp2";
+ case ISD::STRICT_FEXP2: return "strict_fexp2";
case ISD::FLOG: return "flog";
+ case ISD::STRICT_FLOG: return "strict_flog";
case ISD::FLOG2: return "flog2";
+ case ISD::STRICT_FLOG2: return "strict_flog2";
case ISD::FLOG10: return "flog10";
+ case ISD::STRICT_FLOG10: return "strict_flog10";
// Binary operators
case ISD::ADD: return "add";
@@ -214,24 +227,31 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ROTL: return "rotl";
case ISD::ROTR: return "rotr";
case ISD::FADD: return "fadd";
+ case ISD::STRICT_FADD: return "strict_fadd";
case ISD::FSUB: return "fsub";
+ case ISD::STRICT_FSUB: return "strict_fsub";
case ISD::FMUL: return "fmul";
+ case ISD::STRICT_FMUL: return "strict_fmul";
case ISD::FDIV: return "fdiv";
+ case ISD::STRICT_FDIV: return "strict_fdiv";
case ISD::FMA: return "fma";
+ case ISD::STRICT_FMA: return "strict_fma";
case ISD::FMAD: return "fmad";
case ISD::FREM: return "frem";
+ case ISD::STRICT_FREM: return "strict_frem";
case ISD::FCOPYSIGN: return "fcopysign";
case ISD::FGETSIGN: return "fgetsign";
case ISD::FCANONICALIZE: return "fcanonicalize";
case ISD::FPOW: return "fpow";
+ case ISD::STRICT_FPOW: return "strict_fpow";
case ISD::SMIN: return "smin";
case ISD::SMAX: return "smax";
case ISD::UMIN: return "umin";
case ISD::UMAX: return "umax";
case ISD::FPOWI: return "fpowi";
+ case ISD::STRICT_FPOWI: return "strict_fpowi";
case ISD::SETCC: return "setcc";
- case ISD::SETCCE: return "setcce";
case ISD::SETCCCARRY: return "setcccarry";
case ISD::SELECT: return "select";
case ISD::VSELECT: return "vselect";
@@ -366,7 +386,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SETFALSE2: return "setfalse2";
}
case ISD::VECREDUCE_FADD: return "vecreduce_fadd";
+ case ISD::VECREDUCE_STRICT_FADD: return "vecreduce_strict_fadd";
case ISD::VECREDUCE_FMUL: return "vecreduce_fmul";
+ case ISD::VECREDUCE_STRICT_FMUL: return "vecreduce_strict_fmul";
case ISD::VECREDUCE_ADD: return "vecreduce_add";
case ISD::VECREDUCE_MUL: return "vecreduce_mul";
case ISD::VECREDUCE_AND: return "vecreduce_and";
@@ -401,6 +423,32 @@ static Printable PrintNodeId(const SDNode &Node) {
});
}
+// Print the MMO with more information from the SelectionDAG.
+static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
+ const MachineFunction *MF, const Module *M,
+ const MachineFrameInfo *MFI,
+ const TargetInstrInfo *TII, LLVMContext &Ctx) {
+ ModuleSlotTracker MST(M);
+ if (MF)
+ MST.incorporateFunction(MF->getFunction());
+ SmallVector<StringRef, 0> SSNs;
+ MMO.print(OS, MST, SSNs, Ctx, MFI, TII);
+}
+
+static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
+ const SelectionDAG *G) {
+ if (G) {
+ const MachineFunction *MF = &G->getMachineFunction();
+ return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(),
+ &MF->getFrameInfo(), G->getSubtarget().getInstrInfo(),
+ *G->getContext());
+ } else {
+ LLVMContext Ctx;
+ return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr,
+ /*MFI=*/nullptr, /*TII=*/nullptr, Ctx);
+ }
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); }
@@ -430,9 +478,6 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getFlags().hasExact())
OS << " exact";
- if (getFlags().hasUnsafeAlgebra())
- OS << " unsafe";
-
if (getFlags().hasNoNaNs())
OS << " nnan";
@@ -448,6 +493,12 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getFlags().hasAllowContract())
OS << " contract";
+ if (getFlags().hasApproximateFuncs())
+ OS << " afn";
+
+ if (getFlags().hasAllowReassociation())
+ OS << " reassoc";
+
if (getFlags().hasVectorReduction())
OS << " vector-reduction";
@@ -457,7 +508,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << "Mem:";
for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
e = MN->memoperands_end(); i != e; ++i) {
- OS << **i;
+ printMemOperand(OS, **i, G);
if (std::next(i) != e)
OS << " ";
}
@@ -549,7 +600,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << ":" << N->getVT().getEVTString();
}
else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
- OS << "<" << *LD->getMemOperand();
+ OS << "<";
+
+ printMemOperand(OS, *LD->getMemOperand(), G);
bool doExt = true;
switch (LD->getExtensionType()) {
@@ -567,7 +620,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << ">";
} else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
- OS << "<" << *ST->getMemOperand();
+ OS << "<";
+ printMemOperand(OS, *ST->getMemOperand(), G);
if (ST->isTruncatingStore())
OS << ", trunc to " << ST->getMemoryVT().getEVTString();
@@ -578,7 +632,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << ">";
} else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
- OS << "<" << *M->getMemOperand() << ">";
+ OS << "<";
+ printMemOperand(OS, *M->getMemOperand(), G);
+ OS << ">";
} else if (const BlockAddressSDNode *BA =
dyn_cast<BlockAddressSDNode>(this)) {
int64_t offset = BA->getOffset();
@@ -608,6 +664,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getNodeId() != -1)
OS << " [ID=" << getNodeId() << ']';
+ if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this))))
+ OS << "# D:" << isDivergent();
if (!G)
return;
@@ -779,4 +837,8 @@ void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
if (i) OS << ", "; else OS << " ";
printOperand(OS, G, getOperand(i));
}
+ if (DebugLoc DL = getDebugLoc()) {
+ OS << ", ";
+ DL.print(OS);
+ }
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index bd9fcfb5c1e8..f7bd8847bee3 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -29,6 +29,7 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -43,7 +44,6 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -82,6 +82,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -196,7 +197,7 @@ defaultListDAGScheduler("default", "Best scheduler for the target",
namespace llvm {
//===--------------------------------------------------------------------===//
- /// \brief This class is used by SelectionDAGISel to temporarily override
+ /// This class is used by SelectionDAGISel to temporarily override
/// the optimization level on a per-function basis.
class OptLevelChanger {
SelectionDAGISel &IS;
@@ -211,26 +212,27 @@ namespace llvm {
return;
IS.OptLevel = NewOptLevel;
IS.TM.setOptLevel(NewOptLevel);
- DEBUG(dbgs() << "\nChanging optimization level for Function "
- << IS.MF->getFunction().getName() << "\n");
- DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel
- << " ; After: -O" << NewOptLevel << "\n");
+ LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function "
+ << IS.MF->getFunction().getName() << "\n");
+ LLVM_DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O"
+ << NewOptLevel << "\n");
SavedFastISel = IS.TM.Options.EnableFastISel;
if (NewOptLevel == CodeGenOpt::None) {
IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
- DEBUG(dbgs() << "\tFastISel is "
- << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
- << "\n");
+ LLVM_DEBUG(
+ dbgs() << "\tFastISel is "
+ << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
+ << "\n");
}
}
~OptLevelChanger() {
if (IS.OptLevel == SavedOptLevel)
return;
- DEBUG(dbgs() << "\nRestoring optimization level for Function "
- << IS.MF->getFunction().getName() << "\n");
- DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel
- << " ; After: -O" << SavedOptLevel << "\n");
+ LLVM_DEBUG(dbgs() << "\nRestoring optimization level for Function "
+ << IS.MF->getFunction().getName() << "\n");
+ LLVM_DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel << " ; After: -O"
+ << SavedOptLevel << "\n");
IS.OptLevel = SavedOptLevel;
IS.TM.setOptLevel(SavedOptLevel);
IS.TM.setFastISel(SavedFastISel);
@@ -326,9 +328,9 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<GCModuleInfo>();
AU.addRequired<StackProtector>();
- AU.addPreserved<StackProtector>();
AU.addPreserved<GCModuleInfo>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -410,11 +412,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
- DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
- CurDAG->init(*MF, *ORE, this);
+ CurDAG->init(*MF, *ORE, this, LibInfo,
+ getAnalysisIfAvailable<DivergenceAnalysis>());
FuncInfo->set(Fn, *MF, CurDAG);
// Now get the optional analyzes if we want to.
@@ -513,8 +516,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// FIXME: VR def may not be in entry block.
Def->getParent()->insert(std::next(InsertPos), MI);
} else
- DEBUG(dbgs() << "Dropping debug info for dead vreg"
- << TargetRegisterInfo::virtReg2Index(Reg) << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg"
+ << TargetRegisterInfo::virtReg2Index(Reg) << "\n");
}
// If Reg is live-in then update debug info to track its copy in a vreg.
@@ -621,8 +624,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// at this point.
FuncInfo->clear();
- DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n");
- DEBUG(MF->print(dbgs()));
+ LLVM_DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n");
+ LLVM_DEBUG(MF->print(dbgs()));
return true;
}
@@ -711,6 +714,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
int BlockNumber = -1;
(void)BlockNumber;
bool MatchFilterBB = false; (void)MatchFilterBB;
+ TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn);
// Pre-type legalization allow creation of any node types.
CurDAG->NewNodesMustHaveLegalTypes = false;
@@ -718,7 +723,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
MatchFilterBB = (FilterDAGBasicBlockName.empty() ||
FilterDAGBasicBlockName ==
- FuncInfo->MBB->getBasicBlock()->getName().str());
+ FuncInfo->MBB->getBasicBlock()->getName());
#endif
#ifdef NDEBUG
if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
@@ -730,9 +735,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
BlockName =
(MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
}
- DEBUG(dbgs() << "Initial selection DAG: " << printMBBReference(*FuncInfo->MBB)
- << " '" << BlockName << "'\n";
- CurDAG->dump());
+ LLVM_DEBUG(dbgs() << "Initial selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewDAGCombine1 && MatchFilterBB)
CurDAG->viewGraph("dag-combine1 input for " + BlockName);
@@ -744,10 +750,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized lowered selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+
+ LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
// Second step, hack on the DAG until it only uses operations and types that
// the target supports.
@@ -761,10 +770,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
Changed = CurDAG->LegalizeTypes();
}
- DEBUG(dbgs() << "Type-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+
+ LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
// Only allow creation of legal node types.
CurDAG->NewNodesMustHaveLegalTypes = true;
@@ -780,10 +792,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+
+ LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
}
{
@@ -793,10 +808,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
}
if (Changed) {
- DEBUG(dbgs() << "Vector-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ LLVM_DEBUG(dbgs() << "Vector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
{
NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
@@ -804,10 +819,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->LegalizeTypes();
}
- DEBUG(dbgs() << "Vector/type-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ LLVM_DEBUG(dbgs() << "Vector/type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewDAGCombineLT && MatchFilterBB)
CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
@@ -819,10 +834,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized vector-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ LLVM_DEBUG(dbgs() << "Optimized vector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
}
if (ViewLegalizeDAGs && MatchFilterBB)
@@ -834,10 +852,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Legalize();
}
- DEBUG(dbgs() << "Legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+
+ LLVM_DEBUG(dbgs() << "Legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewDAGCombine2 && MatchFilterBB)
CurDAG->viewGraph("dag-combine2 input for " + BlockName);
@@ -849,10 +870,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+
+ LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (OptLevel != CodeGenOpt::None)
ComputeLiveOutVRegInfo();
@@ -868,10 +892,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DoInstructionSelection();
}
- DEBUG(dbgs() << "Selected selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ LLVM_DEBUG(dbgs() << "Selected selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewSchedDAGs && MatchFilterBB)
CurDAG->viewGraph("scheduler input for " + BlockName);
@@ -937,10 +961,62 @@ public:
} // end anonymous namespace
+// This function is used to enforce the topological node id property
+// property leveraged during Instruction selection. Before selection all
+// nodes are given a non-negative id such that all nodes have a larger id than
+// their operands. As this holds transitively we can prune checks that a node N
+// is a predecessor of M another by not recursively checking through M's
+// operands if N's ID is larger than M's ID. This is significantly improves
+// performance of for various legality checks (e.g. IsLegalToFold /
+// UpdateChains).
+
+// However, when we fuse multiple nodes into a single node
+// during selection we may induce a predecessor relationship between inputs and
+// outputs of distinct nodes being merged violating the topological property.
+// Should a fused node have a successor which has yet to be selected, our
+// legality checks would be incorrect. To avoid this we mark all unselected
+// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x =>
+// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M.
+// We use bit-negation to more clearly enforce that node id -1 can only be
+// achieved by selected nodes). As the conversion is reversable the original Id,
+// topological pruning can still be leveraged when looking for unselected nodes.
+// This method is call internally in all ISel replacement calls.
+void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {
+ SmallVector<SDNode *, 4> Nodes;
+ Nodes.push_back(Node);
+
+ while (!Nodes.empty()) {
+ SDNode *N = Nodes.pop_back_val();
+ for (auto *U : N->uses()) {
+ auto UId = U->getNodeId();
+ if (UId > 0) {
+ InvalidateNodeId(U);
+ Nodes.push_back(U);
+ }
+ }
+ }
+}
+
+// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a
+// NodeId with the equivalent node id which is invalid for topological
+// pruning.
+void SelectionDAGISel::InvalidateNodeId(SDNode *N) {
+ int InvalidId = -(N->getNodeId() + 1);
+ N->setNodeId(InvalidId);
+}
+
+// getUninvalidatedNodeId - get original uninvalidated node id.
+int SelectionDAGISel::getUninvalidatedNodeId(SDNode *N) {
+ int Id = N->getNodeId();
+ if (Id < -1)
+ return -(Id + 1);
+ return Id;
+}
+
void SelectionDAGISel::DoInstructionSelection() {
- DEBUG(dbgs() << "===== Instruction selection begins: "
- << printMBBReference(*FuncInfo->MBB) << " '"
- << FuncInfo->MBB->getName() << "'\n");
+ LLVM_DEBUG(dbgs() << "===== Instruction selection begins: "
+ << printMBBReference(*FuncInfo->MBB) << " '"
+ << FuncInfo->MBB->getName() << "'\n");
PreprocessISelDAG();
@@ -972,6 +1048,33 @@ void SelectionDAGISel::DoInstructionSelection() {
if (Node->use_empty())
continue;
+#ifndef NDEBUG
+ SmallVector<SDNode *, 4> Nodes;
+ Nodes.push_back(Node);
+
+ while (!Nodes.empty()) {
+ auto N = Nodes.pop_back_val();
+ if (N->getOpcode() == ISD::TokenFactor || N->getNodeId() < 0)
+ continue;
+ for (const SDValue &Op : N->op_values()) {
+ if (Op->getOpcode() == ISD::TokenFactor)
+ Nodes.push_back(Op.getNode());
+ else {
+ // We rely on topological ordering of node ids for checking for
+ // cycles when fusing nodes during selection. All unselected nodes
+ // successors of an already selected node should have a negative id.
+ // This assertion will catch such cases. If this assertion triggers
+ // it is likely you using DAG-level Value/Node replacement functions
+ // (versus equivalent ISEL replacement) in backend-specific
+ // selections. See comment in EnforceNodeIdInvariant for more
+ // details.
+ assert(Op->getNodeId() != -1 &&
+ "Node has already selected predecessor node");
+ }
+ }
+ }
+#endif
+
// When we are using non-default rounding modes or FP exception behavior
// FP operations are represented by StrictFP pseudo-operations. They
// need to be simplified here so that the target-specific instruction
@@ -985,13 +1088,16 @@ void SelectionDAGISel::DoInstructionSelection() {
if (Node->isStrictFPOpcode())
Node = CurDAG->mutateStrictFPToFP(Node);
+ LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
+ Node->dump(CurDAG));
+
Select(Node);
}
CurDAG->setRoot(Dummy.getValue());
}
- DEBUG(dbgs() << "===== Instruction selection ends:\n");
+ LLVM_DEBUG(dbgs() << "\n===== Instruction selection ends:\n");
PostprocessISelDAG();
}
@@ -1264,7 +1370,7 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
}
auto DLoc = isa<Instruction>(SwiftErrorVal)
- ? dyn_cast<Instruction>(SwiftErrorVal)->getDebugLoc()
+ ? cast<Instruction>(SwiftErrorVal)->getDebugLoc()
: DebugLoc();
const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo();
@@ -1381,7 +1487,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = nullptr;
if (TM.Options.EnableFastISel) {
- DEBUG(dbgs() << "Enabling fast-isel\n");
+ LLVM_DEBUG(dbgs() << "Enabling fast-isel\n");
FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
}
@@ -1398,6 +1504,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()];
FuncInfo->InsertPt = FuncInfo->MBB->begin();
+ CurDAG->setFunctionLoweringInfo(FuncInfo);
+
if (!FastIS) {
LowerArguments(Fn);
} else {
@@ -1435,6 +1543,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
processDbgDeclares(FuncInfo);
// Iterate over all basic blocks in the function.
+ StackProtector &SP = getAnalysis<StackProtector>();
for (const BasicBlock *LLVMBB : RPOT) {
if (OptLevel != CodeGenOpt::None) {
bool AllPredsVisited = true;
@@ -1604,7 +1713,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->recomputeInsertPt();
}
- if (getAnalysis<StackProtector>().shouldEmitSDCheck(*LLVMBB)) {
+ if (SP.shouldEmitSDCheck(*LLVMBB)) {
bool FunctionBasedInstrumentation =
TLI->getSSPStackGuardCheck(*Fn.getParent());
SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB],
@@ -1630,11 +1739,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end());
}
+ if (FastIS)
+ FastIS->finishBasicBlock();
FinishBasicBlock();
FuncInfo->PHINodesToUpdate.clear();
ElidedArgCopyInstrs.clear();
}
+ SP.copyToMachineFrameInfo(MF->getFrameInfo());
+
propagateSwiftErrorVRegs(FuncInfo);
delete FastIS;
@@ -1728,12 +1841,12 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB) {
void
SelectionDAGISel::FinishBasicBlock() {
- DEBUG(dbgs() << "Total amount of phi nodes to update: "
- << FuncInfo->PHINodesToUpdate.size() << "\n";
- for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
- dbgs() << "Node " << i << " : ("
- << FuncInfo->PHINodesToUpdate[i].first
- << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
+ LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: "
+ << FuncInfo->PHINodesToUpdate.size() << "\n";
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e;
+ ++i) dbgs()
+ << "Node " << i << " : (" << FuncInfo->PHINodesToUpdate[i].first
+ << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
// Next, now that we know what the last MBB the LLVM BB expanded is, update
// PHI nodes in successors.
@@ -2012,7 +2125,7 @@ bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
return true;
// If the actual AND mask is allowing unallowed bits, this doesn't match.
- if (ActualMask.intersects(~DesiredMask))
+ if (!ActualMask.isSubsetOf(DesiredMask))
return false;
// Otherwise, the DAG Combiner may have proven that the value coming in is
@@ -2041,7 +2154,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
return true;
// If the actual AND mask is allowing unallowed bits, this doesn't match.
- if (ActualMask.intersects(~DesiredMask))
+ if (!ActualMask.isSubsetOf(DesiredMask))
return false;
// Otherwise, the DAG Combiner may have proven that the value coming in is
@@ -2134,52 +2247,44 @@ static SDNode *findGlueUse(SDNode *N) {
return nullptr;
}
-/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
-/// This function iteratively traverses up the operand chain, ignoring
-/// certain nodes.
-static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
- SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited,
+/// findNonImmUse - Return true if "Def" is a predecessor of "Root" via a path
+/// beyond "ImmedUse". We may ignore chains as they are checked separately.
+static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
bool IgnoreChains) {
- // The NodeID's are given uniques ID's where a node ID is guaranteed to be
- // greater than all of its (recursive) operands. If we scan to a point where
- // 'use' is smaller than the node we're scanning for, then we know we will
- // never find it.
- //
- // The Use may be -1 (unassigned) if it is a newly allocated node. This can
- // happen because we scan down to newly selected nodes in the case of glue
- // uses.
- std::vector<SDNode *> WorkList;
- WorkList.push_back(Use);
-
- while (!WorkList.empty()) {
- Use = WorkList.back();
- WorkList.pop_back();
- if (Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)
- continue;
+ SmallPtrSet<const SDNode *, 16> Visited;
+ SmallVector<const SDNode *, 16> WorkList;
+ // Only check if we have non-immediate uses of Def.
+ if (ImmedUse->isOnlyUserOf(Def))
+ return false;
- // Don't revisit nodes if we already scanned it and didn't fail, we know we
- // won't fail if we scan it again.
- if (!Visited.insert(Use).second)
+ // We don't care about paths to Def that go through ImmedUse so mark it
+ // visited and mark non-def operands as used.
+ Visited.insert(ImmedUse);
+ for (const SDValue &Op : ImmedUse->op_values()) {
+ SDNode *N = Op.getNode();
+ // Ignore chain deps (they are validated by
+ // HandleMergeInputChains) and immediate uses
+ if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def)
continue;
+ if (!Visited.insert(N).second)
+ continue;
+ WorkList.push_back(N);
+ }
- for (const SDValue &Op : Use->op_values()) {
- // Ignore chain uses, they are validated by HandleMergeInputChains.
- if (Op.getValueType() == MVT::Other && IgnoreChains)
- continue;
-
+ // Initialize worklist to operands of Root.
+ if (Root != ImmedUse) {
+ for (const SDValue &Op : Root->op_values()) {
SDNode *N = Op.getNode();
- if (N == Def) {
- if (Use == ImmedUse || Use == Root)
- continue; // We are not looking for immediate use.
- assert(N != Root);
- return true;
- }
-
- // Traverse up the operand chain.
+ // Ignore chains (they are validated by HandleMergeInputChains)
+ if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def)
+ continue;
+ if (!Visited.insert(N).second)
+ continue;
WorkList.push_back(N);
}
}
- return false;
+
+ return SDNode::hasPredecessorHelper(Def, Visited, WorkList, 0, true);
}
/// IsProfitableToFold - Returns true if it's profitable to fold the specific
@@ -2199,7 +2304,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
// If Root use can somehow reach N through a path that that doesn't contain
// U then folding N would create a cycle. e.g. In the following
- // diagram, Root can reach N through X. If N is folded into into Root, then
+ // diagram, Root can reach N through X. If N is folded into Root, then
// X is both a predecessor and a successor of U.
//
// [N*] //
@@ -2251,13 +2356,12 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
// If our query node has a glue result with a use, we've walked up it. If
// the user (which has already been selected) has a chain or indirectly uses
- // the chain, our WalkChainUsers predicate will not consider it. Because of
+ // the chain, HandleMergeInputChains will not consider it. Because of
// this, we cannot ignore chains in this predicate.
IgnoreChains = false;
}
- SmallPtrSet<SDNode*, 16> Visited;
- return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
+ return !findNonImmUse(Root, N.getNode(), U, IgnoreChains);
}
void SelectionDAGISel::Select_INLINEASM(SDNode *N) {
@@ -2360,7 +2464,8 @@ void SelectionDAGISel::UpdateChains(
std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N,
static_cast<SDNode *>(nullptr));
});
- CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain);
+ if (ChainNode->getOpcode() != ISD::TokenFactor)
+ ReplaceUses(ChainVal, InputChain);
// If the node became dead and we haven't already seen it, delete it.
if (ChainNode != NodeToMatch && ChainNode->use_empty() &&
@@ -2372,144 +2477,7 @@ void SelectionDAGISel::UpdateChains(
if (!NowDeadNodes.empty())
CurDAG->RemoveDeadNodes(NowDeadNodes);
- DEBUG(dbgs() << "ISEL: Match complete!\n");
-}
-
-enum ChainResult {
- CR_Simple,
- CR_InducesCycle,
- CR_LeadsToInteriorNode
-};
-
-/// WalkChainUsers - Walk down the users of the specified chained node that is
-/// part of the pattern we're matching, looking at all of the users we find.
-/// This determines whether something is an interior node, whether we have a
-/// non-pattern node in between two pattern nodes (which prevent folding because
-/// it would induce a cycle) and whether we have a TokenFactor node sandwiched
-/// between pattern nodes (in which case the TF becomes part of the pattern).
-///
-/// The walk we do here is guaranteed to be small because we quickly get down to
-/// already selected nodes "below" us.
-static ChainResult
-WalkChainUsers(const SDNode *ChainedNode,
- SmallVectorImpl<SDNode *> &ChainedNodesInPattern,
- DenseMap<const SDNode *, ChainResult> &TokenFactorResult,
- SmallVectorImpl<SDNode *> &InteriorChainedNodes) {
- ChainResult Result = CR_Simple;
-
- for (SDNode::use_iterator UI = ChainedNode->use_begin(),
- E = ChainedNode->use_end(); UI != E; ++UI) {
- // Make sure the use is of the chain, not some other value we produce.
- if (UI.getUse().getValueType() != MVT::Other) continue;
-
- SDNode *User = *UI;
-
- if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
- continue;
-
- // If we see an already-selected machine node, then we've gone beyond the
- // pattern that we're selecting down into the already selected chunk of the
- // DAG.
- unsigned UserOpcode = User->getOpcode();
- if (User->isMachineOpcode() ||
- UserOpcode == ISD::CopyToReg ||
- UserOpcode == ISD::CopyFromReg ||
- UserOpcode == ISD::INLINEASM ||
- UserOpcode == ISD::EH_LABEL ||
- UserOpcode == ISD::LIFETIME_START ||
- UserOpcode == ISD::LIFETIME_END) {
- // If their node ID got reset to -1 then they've already been selected.
- // Treat them like a MachineOpcode.
- if (User->getNodeId() == -1)
- continue;
- }
-
- // If we have a TokenFactor, we handle it specially.
- if (User->getOpcode() != ISD::TokenFactor) {
- // If the node isn't a token factor and isn't part of our pattern, then it
- // must be a random chained node in between two nodes we're selecting.
- // This happens when we have something like:
- // x = load ptr
- // call
- // y = x+4
- // store y -> ptr
- // Because we structurally match the load/store as a read/modify/write,
- // but the call is chained between them. We cannot fold in this case
- // because it would induce a cycle in the graph.
- if (!std::count(ChainedNodesInPattern.begin(),
- ChainedNodesInPattern.end(), User))
- return CR_InducesCycle;
-
- // Otherwise we found a node that is part of our pattern. For example in:
- // x = load ptr
- // y = x+4
- // store y -> ptr
- // This would happen when we're scanning down from the load and see the
- // store as a user. Record that there is a use of ChainedNode that is
- // part of the pattern and keep scanning uses.
- Result = CR_LeadsToInteriorNode;
- InteriorChainedNodes.push_back(User);
- continue;
- }
-
- // If we found a TokenFactor, there are two cases to consider: first if the
- // TokenFactor is just hanging "below" the pattern we're matching (i.e. no
- // uses of the TF are in our pattern) we just want to ignore it. Second,
- // the TokenFactor can be sandwiched in between two chained nodes, like so:
- // [Load chain]
- // ^
- // |
- // [Load]
- // ^ ^
- // | \ DAG's like cheese
- // / \ do you?
- // / |
- // [TokenFactor] [Op]
- // ^ ^
- // | |
- // \ /
- // \ /
- // [Store]
- //
- // In this case, the TokenFactor becomes part of our match and we rewrite it
- // as a new TokenFactor.
- //
- // To distinguish these two cases, do a recursive walk down the uses.
- auto MemoizeResult = TokenFactorResult.find(User);
- bool Visited = MemoizeResult != TokenFactorResult.end();
- // Recursively walk chain users only if the result is not memoized.
- if (!Visited) {
- auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult,
- InteriorChainedNodes);
- MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first;
- }
- switch (MemoizeResult->second) {
- case CR_Simple:
- // If the uses of the TokenFactor are just already-selected nodes, ignore
- // it, it is "below" our pattern.
- continue;
- case CR_InducesCycle:
- // If the uses of the TokenFactor lead to nodes that are not part of our
- // pattern that are not selected, folding would turn this into a cycle,
- // bail out now.
- return CR_InducesCycle;
- case CR_LeadsToInteriorNode:
- break; // Otherwise, keep processing.
- }
-
- // Okay, we know we're in the interesting interior case. The TokenFactor
- // is now going to be considered part of the pattern so that we rewrite its
- // uses (it may have uses that are not part of the pattern) with the
- // ultimate chain result of the generated code. We will also add its chain
- // inputs as inputs to the ultimate TokenFactor we create.
- Result = CR_LeadsToInteriorNode;
- if (!Visited) {
- ChainedNodesInPattern.push_back(User);
- InteriorChainedNodes.push_back(User);
- }
- }
-
- return Result;
+ LLVM_DEBUG(dbgs() << "ISEL: Match complete!\n");
}
/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
@@ -2521,47 +2489,56 @@ WalkChainUsers(const SDNode *ChainedNode,
static SDValue
HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
SelectionDAG *CurDAG) {
- // Used for memoization. Without it WalkChainUsers could take exponential
- // time to run.
- DenseMap<const SDNode *, ChainResult> TokenFactorResult;
- // Walk all of the chained nodes we've matched, recursively scanning down the
- // users of the chain result. This adds any TokenFactor nodes that are caught
- // in between chained nodes to the chained and interior nodes list.
- SmallVector<SDNode*, 3> InteriorChainedNodes;
- for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
- if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
- TokenFactorResult,
- InteriorChainedNodes) == CR_InducesCycle)
- return SDValue(); // Would induce a cycle.
- }
- // Okay, we have walked all the matched nodes and collected TokenFactor nodes
- // that we are interested in. Form our input TokenFactor node.
+ SmallPtrSet<const SDNode *, 16> Visited;
+ SmallVector<const SDNode *, 8> Worklist;
SmallVector<SDValue, 3> InputChains;
- for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
- // Add the input chain of this node to the InputChains list (which will be
- // the operands of the generated TokenFactor) if it's not an interior node.
- SDNode *N = ChainNodesMatched[i];
- if (N->getOpcode() != ISD::TokenFactor) {
- if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
- continue;
+ unsigned int Max = 8192;
- // Otherwise, add the input chain.
- SDValue InChain = ChainNodesMatched[i]->getOperand(0);
- assert(InChain.getValueType() == MVT::Other && "Not a chain");
- InputChains.push_back(InChain);
- continue;
- }
+ // Quick exit on trivial merge.
+ if (ChainNodesMatched.size() == 1)
+ return ChainNodesMatched[0]->getOperand(0);
- // If we have a token factor, we want to add all inputs of the token factor
- // that are not part of the pattern we're matching.
- for (const SDValue &Op : N->op_values()) {
- if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
- Op.getNode()))
- InputChains.push_back(Op);
- }
+ // Add chains that aren't already added (internal). Peek through
+ // token factors.
+ std::function<void(const SDValue)> AddChains = [&](const SDValue V) {
+ if (V.getValueType() != MVT::Other)
+ return;
+ if (V->getOpcode() == ISD::EntryToken)
+ return;
+ if (!Visited.insert(V.getNode()).second)
+ return;
+ if (V->getOpcode() == ISD::TokenFactor) {
+ for (const SDValue &Op : V->op_values())
+ AddChains(Op);
+ } else
+ InputChains.push_back(V);
+ };
+
+ for (auto *N : ChainNodesMatched) {
+ Worklist.push_back(N);
+ Visited.insert(N);
}
+ while (!Worklist.empty())
+ AddChains(Worklist.pop_back_val()->getOperand(0));
+
+ // Skip the search if there are no chain dependencies.
+ if (InputChains.size() == 0)
+ return CurDAG->getEntryNode();
+
+ // If one of these chains is a successor of input, we must have a
+ // node that is both the predecessor and successor of the
+ // to-be-merged nodes. Fail.
+ Visited.clear();
+ for (SDValue V : InputChains)
+ Worklist.push_back(V.getNode());
+
+ for (auto *N : ChainNodesMatched)
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true))
+ return SDValue();
+
+ // Return merged chain.
if (InputChains.size() == 1)
return InputChains[0];
return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]),
@@ -2606,8 +2583,8 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
// Move the glue if needed.
if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
(unsigned)OldGlueResultNo != ResNumResults-1)
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo),
- SDValue(Res, ResNumResults-1));
+ ReplaceUses(SDValue(Node, OldGlueResultNo),
+ SDValue(Res, ResNumResults - 1));
if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
--ResNumResults;
@@ -2615,14 +2592,15 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
// Move the chain reference if needed.
if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
(unsigned)OldChainResultNo != ResNumResults-1)
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
- SDValue(Res, ResNumResults-1));
+ ReplaceUses(SDValue(Node, OldChainResultNo),
+ SDValue(Res, ResNumResults - 1));
// Otherwise, no replacement happened because the node already exists. Replace
// Uses of the old node with the new one.
if (Res != Node) {
- CurDAG->ReplaceAllUsesWith(Node, Res);
- CurDAG->RemoveDeadNode(Node);
+ ReplaceNode(Node, Res);
+ } else {
+ EnforceNodeIdInvariant(Res);
}
return Res;
@@ -2861,7 +2839,7 @@ struct MatchScope {
bool HasChainNodesMatched;
};
-/// \\brief A DAG update listener to keep the matching state
+/// \A DAG update listener to keep the matching state
/// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to
/// change the DAG while matching. X86 addressing mode matcher is an example
/// for this.
@@ -2939,8 +2917,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
return;
case ISD::AssertSext:
case ISD::AssertZext:
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0),
- NodeToMatch->getOperand(0));
+ ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0));
CurDAG->RemoveDeadNode(NodeToMatch);
return;
case ISD::INLINEASM:
@@ -2988,9 +2965,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// update the chain results when the pattern is complete.
SmallVector<SDNode*, 3> ChainNodesMatched;
- DEBUG(dbgs() << "ISEL: Starting pattern match on root node: ";
- NodeToMatch->dump(CurDAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "ISEL: Starting pattern match\n");
// Determine where to start the interpreter. Normally we start at opcode #0,
// but if the state machine starts with an OPC_SwitchOpcode, then we
@@ -3002,7 +2977,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// Already computed the OpcodeOffset table, just index into it.
if (N.getOpcode() < OpcodeOffset.size())
MatcherIndex = OpcodeOffset[N.getOpcode()];
- DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n");
+ LLVM_DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n");
} else if (MatcherTable[0] == OPC_SwitchOpcode) {
// Otherwise, the table isn't computed, but the state machine does start
@@ -3069,9 +3044,10 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (!Result)
break;
- DEBUG(dbgs() << " Skipped scope entry (due to false predicate) at "
- << "index " << MatcherIndexOfPredicate
- << ", continuing at " << FailIndex << "\n");
+ LLVM_DEBUG(
+ dbgs() << " Skipped scope entry (due to false predicate) at "
+ << "index " << MatcherIndexOfPredicate << ", continuing at "
+ << FailIndex << "\n");
++NumDAGIselRetries;
// Otherwise, we know that this case of the Scope is guaranteed to fail,
@@ -3120,11 +3096,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (auto *MN = dyn_cast<MemSDNode>(N))
MatchedMemRefs.push_back(MN->getMemOperand());
else {
- DEBUG(
- dbgs() << "Expected MemSDNode ";
- N->dump(CurDAG);
- dbgs() << '\n'
- );
+ LLVM_DEBUG(dbgs() << "Expected MemSDNode "; N->dump(CurDAG);
+ dbgs() << '\n');
}
continue;
@@ -3245,8 +3218,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (CaseSize == 0) break;
// Otherwise, execute the case we found.
- DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart
- << " to " << MatcherIndex << "\n");
+ LLVM_DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart << " to "
+ << MatcherIndex << "\n");
continue;
}
@@ -3277,8 +3250,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (CaseSize == 0) break;
// Otherwise, execute the case we found.
- DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
- << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
+ LLVM_DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+ << "] from " << SwitchStart << " to " << MatcherIndex
+ << '\n');
continue;
}
case OPC_CheckChild0Type: case OPC_CheckChild1Type:
@@ -3658,16 +3632,11 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
Res->setMemRefs(MemRefs, MemRefs + NumMemRefs);
}
- DEBUG(
- if (!MatchedMemRefs.empty() && Res->memoperands_empty())
- dbgs() << " Dropping mem operands\n";
- dbgs() << " "
- << (IsMorphNodeTo ? "Morphed" : "Created")
- << " node: ";
- Res->dump(CurDAG);
-
- dbgs() << '\n';
- );
+ LLVM_DEBUG(if (!MatchedMemRefs.empty() && Res->memoperands_empty()) dbgs()
+ << " Dropping mem operands\n";
+ dbgs() << " " << (IsMorphNodeTo ? "Morphed" : "Created")
+ << " node: ";
+ Res->dump(CurDAG););
// If this was a MorphNodeTo then we're completely done!
if (IsMorphNodeTo) {
@@ -3702,7 +3671,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
NodeToMatch->getValueType(i).getSizeInBits() ==
Res.getValueSizeInBits()) &&
"invalid replacement");
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
+ ReplaceUses(SDValue(NodeToMatch, i), Res);
}
// Update chain uses.
@@ -3715,8 +3684,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) ==
MVT::Glue &&
InputGlue.getNode())
- CurDAG->ReplaceAllUsesOfValueWith(
- SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), InputGlue);
+ ReplaceUses(SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1),
+ InputGlue);
assert(NodeToMatch->use_empty() &&
"Didn't replace all uses of the node?");
@@ -3729,7 +3698,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// If the code reached this point, then the match failed. See if there is
// another child to try in the current 'Scope', otherwise pop it until we
// find a case to check.
- DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n");
+ LLVM_DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex
+ << "\n");
++NumDAGIselRetries;
while (true) {
if (MatchScopes.empty()) {
@@ -3749,7 +3719,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
MatcherIndex = LastScope.FailIndex;
- DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n");
+ LLVM_DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n");
InputChain = LastScope.InputChain;
InputGlue = LastScope.InputGlue;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index be4ab094bf49..3b19bff4743d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -229,7 +229,7 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet
if (level >= 20) {
if (!printed) {
printed = true;
- DEBUG(dbgs() << "setSubgraphColor hit max level\n");
+ LLVM_DEBUG(dbgs() << "setSubgraphColor hit max level\n");
}
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 3f64b49e3555..5cf06e62b80c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -43,6 +42,7 @@
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <cassert>
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d76e52d78870..fa867fcec366 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -32,6 +31,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include <cctype>
using namespace llvm;
@@ -96,7 +96,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
return true;
}
-/// \brief Set CallLoweringInfo attribute flags based on a call instruction
+/// Set CallLoweringInfo attribute flags based on a call instruction
/// and called function attributes.
void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS,
unsigned ArgIdx) {
@@ -524,6 +524,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
// Other users may use these bits.
+ EVT VT = Op.getValueType();
if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
if (Depth != 0) {
// If not at the root, Just compute the Known bits to
@@ -537,7 +538,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
} else if (DemandedMask == 0) {
// Not demanding any bits from Op.
if (!Op.isUndef())
- return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
return false;
} else if (Depth == 6) { // Limit search depth.
return false;
@@ -580,7 +581,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownBits LHSKnown;
// Do not increment Depth here; that can cause an infinite loop.
TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth);
- // If the LHS already has zeros where RHSC does, this and is dead.
+ // If the LHS already has zeros where RHSC does, this 'and' is dead.
if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
return TLO.CombineTo(Op, Op0);
@@ -596,8 +597,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
LHSKnown.One == ~RHSC->getAPIntValue()) {
- SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(),
- Op0.getOperand(0), Op.getOperand(1));
+ SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0),
+ Op.getOperand(1));
return TLO.CombineTo(Op, Xor);
}
}
@@ -618,7 +619,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return TLO.CombineTo(Op, Op.getOperand(1));
// If all of the demanded bits in the inputs are known zeros, return zero.
if (NewMask.isSubsetOf(Known.Zero | Known2.Zero))
- return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType()));
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
// If the RHS is a constant, see if we can simplify it.
if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO))
return true;
@@ -680,7 +681,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// (but not both) turn this into an *inclusive* or.
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
if ((NewMask & ~Known.Zero & ~Known2.Zero) == 0)
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT,
Op.getOperand(0),
Op.getOperand(1)));
@@ -696,7 +697,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// NB: it is okay if more bits are known than are requested
if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // all known on one side
if (Known.One == Known2.One) { // set bits are the same on both sides
- EVT VT = Op.getValueType();
SDValue ANDC = TLO.DAG.getConstant(~Known.One & NewMask, dl, VT);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
Op.getOperand(0), ANDC));
@@ -710,7 +710,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (C && !C->isAllOnesValue()) {
if (NewMask.isSubsetOf(C->getAPIntValue())) {
// We're flipping all demanded bits. Flip the undemanded bits too.
- SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), Op.getValueType());
+ SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), VT);
return TLO.CombineTo(Op, New);
}
// If we can't turn this into a 'not', try to shrink the constant.
@@ -761,7 +761,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth &&
- getBooleanContents(Op.getValueType()) ==
+ getBooleanContents(VT) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
// FIXME: We're limiting to integer types here, but this should also work
@@ -807,7 +807,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
SDValue NewSA =
TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
- EVT VT = Op.getValueType();
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
InOp.getOperand(0),
NewSA));
@@ -835,8 +834,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
TLO.DAG.getConstant(ShAmt, dl, ShTy));
return
TLO.CombineTo(Op,
- TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
- NarrowShl));
+ TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
}
// Repeat the SHL optimization above in cases where an extension
// intervenes: (shl (anyext (shr x, c1)), c2) to
@@ -854,7 +852,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
SDValue NewSA =
TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
Op.getOperand(1).getValueType());
- EVT VT = Op.getValueType();
SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
InnerOp.getOperand(0));
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
@@ -904,7 +901,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
SDValue NewSA =
TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
- EVT VT = Op.getValueType();
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
InOp.getOperand(0),
NewSA));
@@ -930,12 +926,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// the shift amount is >= the size of the datatype, which is undefined.
if (NewMask.isOneValue())
return TLO.CombineTo(Op,
- TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
- Op.getOperand(0), Op.getOperand(1)));
+ TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
+ Op.getOperand(1)));
if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
- EVT VT = Op.getValueType();
-
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
@@ -1000,14 +994,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (!AlreadySignExtended) {
// Compute the correct shift amount type, which must be getShiftAmountTy
// for scalar types after legalization.
- EVT ShiftAmtTy = Op.getValueType();
+ EVT ShiftAmtTy = VT;
if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
ShiftAmtTy);
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
- Op.getValueType(), InOp,
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, InOp,
ShiftAmt));
}
}
@@ -1072,8 +1065,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If none of the top bits are demanded, convert this into an any_extend.
if (NewMask.getActiveBits() <= OperandBitWidth)
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
- Op.getValueType(),
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
Op.getOperand(0)));
APInt InMask = NewMask.trunc(OperandBitWidth);
@@ -1089,8 +1081,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If none of the top bits are demanded, convert this into an any_extend.
if (NewMask.getActiveBits() <= InBits)
- return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
- Op.getValueType(),
+ return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
Op.getOperand(0)));
// Since some of the sign extended bits are demanded, we know that the sign
@@ -1107,8 +1098,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the sign bit is known zero, convert this to a zero extend.
if (Known.isNonNegative())
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
- Op.getValueType(),
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT,
Op.getOperand(0)));
break;
}
@@ -1139,8 +1129,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::SRL:
// Shrink SRL by a constant if none of the high bits shifted in are
// demanded.
- if (TLO.LegalTypes() &&
- !isTypeDesirableForOp(ISD::SRL, Op.getValueType()))
+ if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
// undesirable.
break;
@@ -1150,8 +1139,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
SDValue Shift = In.getOperand(1);
if (TLO.LegalTypes()) {
uint64_t ShVal = ShAmt->getZExtValue();
- Shift = TLO.DAG.getConstant(ShVal, dl,
- getShiftAmountTy(Op.getValueType(), DL));
+ Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
}
if (ShAmt->getZExtValue() < BitWidth) {
@@ -1163,12 +1151,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (!(HighBits & NewMask)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
- SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
- Op.getValueType(),
+ SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT,
In.getOperand(0));
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
- Op.getValueType(),
- NewTrunc,
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc,
Shift));
}
}
@@ -1182,9 +1167,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::AssertZext: {
// AssertZext demands all of the high bits, plus any of the low bits
// demanded by its users.
- EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- APInt InMask = APInt::getLowBitsSet(BitWidth,
- VT.getSizeInBits());
+ EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
Known, TLO, Depth+1))
return true;
@@ -1196,40 +1180,45 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::BITCAST:
// If this is an FP->Int bitcast and if the sign bit is the only
// thing demanded, turn this into a FGETSIGN.
- if (!TLO.LegalOperations() &&
- !Op.getValueType().isVector() &&
+ if (!TLO.LegalOperations() && !VT.isVector() &&
!Op.getOperand(0).getValueType().isVector() &&
NewMask == APInt::getSignMask(Op.getValueSizeInBits()) &&
Op.getOperand(0).getValueType().isFloatingPoint()) {
- bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
+ bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
- if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() &&
+ if ((OpVTLegal || i32Legal) && VT.isSimple() &&
+ Op.getOperand(0).getValueType() != MVT::f16 &&
Op.getOperand(0).getValueType() != MVT::f128) {
// Cannot eliminate/lower SHL for f128 yet.
- EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
+ EVT Ty = OpVTLegal ? VT : MVT::i32;
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
unsigned OpVTSizeInBits = Op.getValueSizeInBits();
if (!OpVTLegal && OpVTSizeInBits > 32)
- Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
+ Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
unsigned ShVal = Op.getValueSizeInBits() - 1;
- SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType());
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
- Op.getValueType(),
- Sign, ShAmt));
+ SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
}
}
+ // If this is a bitcast, let computeKnownBits handle it. Only do this on a
+ // recursive call where Known may be useful to the caller.
+ if (Depth > 0) {
+ TLO.DAG.computeKnownBits(Op, Known, Depth);
+ return false;
+ }
break;
case ISD::ADD:
case ISD::MUL:
case ISD::SUB: {
// Add, Sub, and Mul don't demand any bits in positions beyond that
// of the highest bit demanded of them.
- APInt LoMask = APInt::getLowBitsSet(BitWidth,
- BitWidth - NewMask.countLeadingZeros());
- if (SimplifyDemandedBits(Op.getOperand(0), LoMask, Known2, TLO, Depth+1) ||
- SimplifyDemandedBits(Op.getOperand(1), LoMask, Known2, TLO, Depth+1) ||
+ SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
+ unsigned NewMaskLZ = NewMask.countLeadingZeros();
+ APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMaskLZ);
+ if (SimplifyDemandedBits(Op0, LoMask, Known2, TLO, Depth + 1) ||
+ SimplifyDemandedBits(Op1, LoMask, Known2, TLO, Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) {
SDNodeFlags Flags = Op.getNode()->getFlags();
@@ -1238,13 +1227,33 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// won't wrap after simplification.
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
- SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, Op.getValueType(),
- Op.getOperand(0), Op.getOperand(1),
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
Flags);
return TLO.CombineTo(Op, NewOp);
}
return true;
}
+
+ // If we have a constant operand, we may be able to turn it into -1 if we
+ // do not demand the high bits. This can make the constant smaller to
+ // encode, allow more general folding, or match specialized instruction
+ // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
+ // is probably not useful (and could be detrimental).
+ ConstantSDNode *C = isConstOrConstSplat(Op1);
+ APInt HighMask = APInt::getHighBitsSet(NewMask.getBitWidth(), NewMaskLZ);
+ if (C && !C->isAllOnesValue() && !C->isOne() &&
+ (C->getAPIntValue() | HighMask).isAllOnesValue()) {
+ SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
+ // We can't guarantee that the new math op doesn't wrap, so explicitly
+ // clear those flags to prevent folding with a potential existing node
+ // that has those flags set.
+ SDNodeFlags Flags;
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
+ return TLO.CombineTo(Op, NewOp);
+ }
+
LLVM_FALLTHROUGH;
}
default:
@@ -1265,10 +1274,384 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (C->isOpaque())
return false;
}
- return TLO.CombineTo(Op,
- TLO.DAG.getConstant(Known.One, dl, Op.getValueType()));
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
+ }
+
+ return false;
+}
+
+bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
+ const APInt &DemandedElts,
+ APInt &KnownUndef,
+ APInt &KnownZero,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+
+ bool Simplified =
+ SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
+ if (Simplified)
+ DCI.CommitTargetLoweringOpt(TLO);
+ return Simplified;
+}
+
+bool TargetLowering::SimplifyDemandedVectorElts(
+ SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef,
+ APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
+ bool AssumeSingleUse) const {
+ EVT VT = Op.getValueType();
+ APInt DemandedElts = DemandedEltMask;
+ unsigned NumElts = DemandedElts.getBitWidth();
+ assert(VT.isVector() && "Expected vector op");
+ assert(VT.getVectorNumElements() == NumElts &&
+ "Mask size mismatches value type element count!");
+
+ KnownUndef = KnownZero = APInt::getNullValue(NumElts);
+
+ // Undef operand.
+ if (Op.isUndef()) {
+ KnownUndef.setAllBits();
+ return false;
+ }
+
+ // If Op has other users, assume that all elements are needed.
+ if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
+ DemandedElts.setAllBits();
+
+ // Not demanding any elements from Op.
+ if (DemandedElts == 0) {
+ KnownUndef.setAllBits();
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+ }
+
+ // Limit search depth.
+ if (Depth >= 6)
+ return false;
+
+ SDLoc DL(Op);
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+
+ switch (Op.getOpcode()) {
+ case ISD::SCALAR_TO_VECTOR: {
+ if (!DemandedElts[0]) {
+ KnownUndef.setAllBits();
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+ }
+ KnownUndef.setHighBits(NumElts - 1);
+ break;
+ }
+ case ISD::BITCAST: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // We only handle vectors here.
+ // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
+ if (!SrcVT.isVector())
+ break;
+
+ // Fast handling of 'identity' bitcasts.
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ if (NumSrcElts == NumElts)
+ return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1);
+
+ APInt SrcZero, SrcUndef;
+ APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
+
+ // Bitcast from 'large element' src vector to 'small element' vector, we
+ // must demand a source element if any DemandedElt maps to it.
+ if ((NumElts % NumSrcElts) == 0) {
+ unsigned Scale = NumElts / NumSrcElts;
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SrcDemandedElts.setBit(i / Scale);
+
+ if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
+ TLO, Depth + 1))
+ return true;
+
+ // If the src element is zero/undef then all the output elements will be -
+ // only demanded elements are guaranteed to be correct.
+ for (unsigned i = 0; i != NumSrcElts; ++i) {
+ if (SrcDemandedElts[i]) {
+ if (SrcZero[i])
+ KnownZero.setBits(i * Scale, (i + 1) * Scale);
+ if (SrcUndef[i])
+ KnownUndef.setBits(i * Scale, (i + 1) * Scale);
+ }
+ }
+ }
+
+ // Bitcast from 'small element' src vector to 'large element' vector, we
+ // demand all smaller source elements covered by the larger demanded element
+ // of this vector.
+ if ((NumSrcElts % NumElts) == 0) {
+ unsigned Scale = NumSrcElts / NumElts;
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
+
+ if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
+ TLO, Depth + 1))
+ return true;
+
+ // If all the src elements covering an output element are zero/undef, then
+ // the output element will be as well, assuming it was demanded.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (DemandedElts[i]) {
+ if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
+ KnownZero.setBit(i);
+ if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
+ KnownUndef.setBit(i);
+ }
+ }
+ }
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ // Check all elements and simplify any unused elements with UNDEF.
+ if (!DemandedElts.isAllOnesValue()) {
+ // Don't simplify BROADCASTS.
+ if (llvm::any_of(Op->op_values(),
+ [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
+ SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
+ bool Updated = false;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i] && !Ops[i].isUndef()) {
+ Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
+ KnownUndef.setBit(i);
+ Updated = true;
+ }
+ }
+ if (Updated)
+ return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
+ }
+ }
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue SrcOp = Op.getOperand(i);
+ if (SrcOp.isUndef()) {
+ KnownUndef.setBit(i);
+ } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
+ (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
+ KnownZero.setBit(i);
+ }
+ }
+ break;
+ }
+ case ISD::CONCAT_VECTORS: {
+ EVT SubVT = Op.getOperand(0).getValueType();
+ unsigned NumSubVecs = Op.getNumOperands();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ for (unsigned i = 0; i != NumSubVecs; ++i) {
+ SDValue SubOp = Op.getOperand(i);
+ APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
+ APInt SubUndef, SubZero;
+ if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
+ Depth + 1))
+ return true;
+ KnownUndef.insertBits(SubUndef, i * NumSubElts);
+ KnownZero.insertBits(SubZero, i * NumSubElts);
+ }
+ break;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ if (!isa<ConstantSDNode>(Op.getOperand(2)))
+ break;
+ SDValue Base = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ EVT SubVT = Sub.getValueType();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue();
+ if (Idx.uge(NumElts - NumSubElts))
+ break;
+ unsigned SubIdx = Idx.getZExtValue();
+ APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
+ APInt SubUndef, SubZero;
+ if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
+ Depth + 1))
+ return true;
+ APInt BaseElts = DemandedElts;
+ BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
+ if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
+ Depth + 1))
+ return true;
+ KnownUndef.insertBits(SubUndef, SubIdx);
+ KnownZero.insertBits(SubZero, SubIdx);
+ break;
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ break;
+ SDValue Src = Op.getOperand(0);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+ if (Idx.uge(NumSrcElts - NumElts))
+ break;
+ // Offset the demanded elts by the subvector index.
+ uint64_t SubIdx = Idx.getZExtValue();
+ APInt SrcElts = DemandedElts.zext(NumSrcElts).shl(SubIdx);
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ KnownUndef = SrcUndef.extractBits(NumElts, SubIdx);
+ KnownZero = SrcZero.extractBits(NumElts, SubIdx);
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Scl = Op.getOperand(1);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+
+ // For a legal, constant insertion index, if we don't need this insertion
+ // then strip it, else remove it from the demanded elts.
+ if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
+ unsigned Idx = CIdx->getZExtValue();
+ if (!DemandedElts[Idx])
+ return TLO.CombineTo(Op, Vec);
+ DemandedElts.clearBit(Idx);
+
+ if (SimplifyDemandedVectorElts(Vec, DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+
+ KnownUndef.clearBit(Idx);
+ if (Scl.isUndef())
+ KnownUndef.setBit(Idx);
+
+ KnownZero.clearBit(Idx);
+ if (isNullConstant(Scl) || isNullFPConstant(Scl))
+ KnownZero.setBit(Idx);
+ break;
+ }
+
+ APInt VecUndef, VecZero;
+ if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
+ Depth + 1))
+ return true;
+ // Without knowing the insertion index we can't set KnownUndef/KnownZero.
+ break;
+ }
+ case ISD::VSELECT: {
+ APInt DemandedLHS(DemandedElts);
+ APInt DemandedRHS(DemandedElts);
+
+ // TODO - add support for constant vselect masks.
+
+ // See if we can simplify either vselect operand.
+ APInt UndefLHS, ZeroLHS;
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
+ ZeroLHS, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
+ ZeroRHS, TLO, Depth + 1))
+ return true;
+
+ KnownUndef = UndefLHS & UndefRHS;
+ KnownZero = ZeroLHS & ZeroRHS;
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ // Collect demanded elements from shuffle operands..
+ APInt DemandedLHS(NumElts, 0);
+ APInt DemandedRHS(NumElts, 0);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = ShuffleMask[i];
+ if (M < 0 || !DemandedElts[i])
+ continue;
+ assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
+ if (M < (int)NumElts)
+ DemandedLHS.setBit(M);
+ else
+ DemandedRHS.setBit(M - NumElts);
+ }
+
+ // See if we can simplify either shuffle operand.
+ APInt UndefLHS, ZeroLHS;
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
+ ZeroLHS, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
+ ZeroRHS, TLO, Depth + 1))
+ return true;
+
+ // Simplify mask using undef elements from LHS/RHS.
+ bool Updated = false;
+ bool IdentityLHS = true, IdentityRHS = true;
+ SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int &M = NewMask[i];
+ if (M < 0)
+ continue;
+ if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
+ (M >= (int)NumElts && UndefRHS[M - NumElts])) {
+ Updated = true;
+ M = -1;
+ }
+ IdentityLHS &= (M < 0) || (M == (int)i);
+ IdentityRHS &= (M < 0) || ((M - NumElts) == i);
+ }
+
+ // Update legal shuffle masks based on demanded elements if it won't reduce
+ // to Identity which can cause premature removal of the shuffle mask.
+ if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps &&
+ isShuffleMaskLegal(NewMask, VT))
+ return TLO.CombineTo(Op,
+ TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0),
+ Op.getOperand(1), NewMask));
+
+ // Propagate undef/zero elements from LHS/RHS.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = ShuffleMask[i];
+ if (M < 0) {
+ KnownUndef.setBit(i);
+ } else if (M < (int)NumElts) {
+ if (UndefLHS[M])
+ KnownUndef.setBit(i);
+ if (ZeroLHS[M])
+ KnownZero.setBit(i);
+ } else {
+ if (UndefRHS[M - NumElts])
+ KnownUndef.setBit(i);
+ if (ZeroRHS[M - NumElts])
+ KnownZero.setBit(i);
+ }
+ }
+ break;
+ }
+ case ISD::ADD:
+ case ISD::SUB: {
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
+ SrcZero, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+ KnownZero &= SrcZero;
+ KnownUndef &= SrcUndef;
+ break;
+ }
+ case ISD::TRUNCATE:
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+ break;
+ default: {
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
+ if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth))
+ return true;
+ break;
+ }
}
+ assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
return false;
}
@@ -1316,6 +1699,18 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
return 1;
}
+bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
+ SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
+ TargetLoweringOpt &TLO, unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use SimplifyDemandedVectorElts if you don't know whether Op"
+ " is a target node!");
+ return false;
+}
+
// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
// work with truncating build vectors and vectors with elements of less than
// 8 bits.
@@ -1353,16 +1748,6 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
llvm_unreachable("Invalid boolean contents");
}
-SDValue TargetLowering::getConstTrueVal(SelectionDAG &DAG, EVT VT,
- const SDLoc &DL) const {
- unsigned ElementWidth = VT.getScalarSizeInBits();
- APInt TrueInt =
- getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent
- ? APInt(ElementWidth, 1)
- : APInt::getAllOnesValue(ElementWidth);
- return DAG.getConstant(TrueInt, DL, VT);
-}
-
bool TargetLowering::isConstFalseVal(const SDNode *N) const {
if (!N)
return false;
@@ -1466,6 +1851,89 @@ SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
return SDValue();
}
+/// There are multiple IR patterns that could be checking whether certain
+/// truncation of a signed number would be lossy or not. The pattern which is
+/// best at IR level, may not lower optimally. Thus, we want to unfold it.
+/// We are looking for the following pattern: (KeptBits is a constant)
+/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
+/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
+/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
+/// We will unfold it into the natural trunc+sext pattern:
+/// ((%x << C) a>> C) dstcond %x
+/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
+SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
+ EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ // We must be comparing with a constant.
+ ConstantSDNode *C1;
+ if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
+ return SDValue();
+
+ // N0 should be: add %x, (1 << (KeptBits-1))
+ if (N0->getOpcode() != ISD::ADD)
+ return SDValue();
+
+ // And we must be 'add'ing a constant.
+ ConstantSDNode *C01;
+ if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
+ return SDValue();
+
+ SDValue X = N0->getOperand(0);
+ EVT XVT = X.getValueType();
+
+ // Validate constants ...
+
+ APInt I1 = C1->getAPIntValue();
+
+ ISD::CondCode NewCond;
+ if (Cond == ISD::CondCode::SETULT) {
+ NewCond = ISD::CondCode::SETEQ;
+ } else if (Cond == ISD::CondCode::SETULE) {
+ NewCond = ISD::CondCode::SETEQ;
+ // But need to 'canonicalize' the constant.
+ I1 += 1;
+ } else if (Cond == ISD::CondCode::SETUGT) {
+ NewCond = ISD::CondCode::SETNE;
+ // But need to 'canonicalize' the constant.
+ I1 += 1;
+ } else if (Cond == ISD::CondCode::SETUGE) {
+ NewCond = ISD::CondCode::SETNE;
+ } else
+ return SDValue();
+
+ const APInt &I01 = C01->getAPIntValue();
+ // Both of them must be power-of-two, and the constant from setcc is bigger.
+ if (!(I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2()))
+ return SDValue();
+
+ // They are power-of-two, so which bit is set?
+ const unsigned KeptBits = I1.logBase2();
+ const unsigned KeptBitsMinusOne = I01.logBase2();
+
+ // Magic!
+ if (KeptBits != (KeptBitsMinusOne + 1))
+ return SDValue();
+ assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
+
+ // We don't want to do this in every single case.
+ SelectionDAG &DAG = DCI.DAG;
+ if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
+ XVT, KeptBits))
+ return SDValue();
+
+ const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
+ assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
+
+ // Unfold into: ((%x << C) a>> C) cond %x
+ // Where 'cond' will be either 'eq' or 'ne'.
+ SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
+ SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
+ SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
+ SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
+
+ return T2;
+}
+
/// Try to simplify a setcc built with the specified operands and cc. If it is
/// unable to simplify it, return a null SDValue.
SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -1473,25 +1941,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
DAGCombinerInfo &DCI,
const SDLoc &dl) const {
SelectionDAG &DAG = DCI.DAG;
+ EVT OpVT = N0.getValueType();
// These setcc operations always fold.
switch (Cond) {
default: break;
case ISD::SETFALSE:
- case ISD::SETFALSE2: return DAG.getConstant(0, dl, VT);
+ case ISD::SETFALSE2: return DAG.getBoolConstant(false, dl, VT, OpVT);
case ISD::SETTRUE:
- case ISD::SETTRUE2: {
- TargetLowering::BooleanContent Cnt =
- getBooleanContents(N0->getValueType(0));
- return DAG.getConstant(
- Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl,
- VT);
- }
+ case ISD::SETTRUE2: return DAG.getBoolConstant(true, dl, VT, OpVT);
}
// Ensure that the constant occurs on the RHS and fold constant comparisons.
+ // TODO: Handle non-splat vector constants. All undef causes trouble.
ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
- if (isa<ConstantSDNode>(N0.getNode()) &&
+ if (isConstOrConstSplat(N0) &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
@@ -1737,7 +2201,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
EVT newVT = N0.getOperand(0).getValueType();
if (DCI.isBeforeLegalizeOps() ||
(isOperationLegal(ISD::SETCC, newVT) &&
- getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) {
+ isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
EVT NewSetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
@@ -1867,8 +2331,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ if (SDValue V =
+ optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
+ return V;
+ }
+
+ // These simplifications apply to splat vectors as well.
+ // TODO: Handle more splat vector cases.
+ if (auto *N1C = isConstOrConstSplat(N1)) {
+ const APInt &C1 = N1C->getAPIntValue();
+
APInt MinVal, MaxVal;
- unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
+ unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
if (ISD::isSignedIntSetCC(Cond)) {
MinVal = APInt::getSignedMinValue(OperandBitSize);
MaxVal = APInt::getSignedMaxValue(OperandBitSize);
@@ -1881,84 +2355,105 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
// X >= MIN --> true
if (C1 == MinVal)
- return DAG.getConstant(1, dl, VT);
-
- // X >= C0 --> X > (C0 - 1)
- APInt C = C1 - 1;
- ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
- if ((DCI.isBeforeLegalizeOps() ||
- isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
- (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
- isLegalICmpImmediate(C.getSExtValue())))) {
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(C, dl, N1.getValueType()),
- NewCC);
+ return DAG.getBoolConstant(true, dl, VT, OpVT);
+
+ if (!VT.isVector()) { // TODO: Support this for vectors.
+ // X >= C0 --> X > (C0 - 1)
+ APInt C = C1 - 1;
+ ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
+ if ((DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
+ (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
+ isLegalICmpImmediate(C.getSExtValue())))) {
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C, dl, N1.getValueType()),
+ NewCC);
+ }
}
}
if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
// X <= MAX --> true
if (C1 == MaxVal)
- return DAG.getConstant(1, dl, VT);
+ return DAG.getBoolConstant(true, dl, VT, OpVT);
// X <= C0 --> X < (C0 + 1)
- APInt C = C1 + 1;
- ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
- if ((DCI.isBeforeLegalizeOps() ||
- isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
- (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
- isLegalICmpImmediate(C.getSExtValue())))) {
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(C, dl, N1.getValueType()),
- NewCC);
- }
- }
-
- if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
- return DAG.getConstant(0, dl, VT); // X < MIN --> false
- if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
- return DAG.getConstant(1, dl, VT); // X >= MIN --> true
- if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
- return DAG.getConstant(0, dl, VT); // X > MAX --> false
- if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
- return DAG.getConstant(1, dl, VT); // X <= MAX --> true
-
- // Canonicalize setgt X, Min --> setne X, Min
- if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
- // Canonicalize setlt X, Max --> setne X, Max
- if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
-
- // If we have setult X, 1, turn it into seteq X, 0
- if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(MinVal, dl, N0.getValueType()),
- ISD::SETEQ);
- // If we have setugt X, Max-1, turn it into seteq X, Max
- if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(MaxVal, dl, N0.getValueType()),
- ISD::SETEQ);
+ if (!VT.isVector()) { // TODO: Support this for vectors.
+ APInt C = C1 + 1;
+ ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
+ if ((DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
+ (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
+ isLegalICmpImmediate(C.getSExtValue())))) {
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C, dl, N1.getValueType()),
+ NewCC);
+ }
+ }
+ }
- // If we have "setcc X, C0", check to see if we can shrink the immediate
- // by changing cc.
+ if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
+ if (C1 == MinVal)
+ return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
+
+ // TODO: Support this for vectors after legalize ops.
+ if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
+ // Canonicalize setlt X, Max --> setne X, Max
+ if (C1 == MaxVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+ // If we have setult X, 1, turn it into seteq X, 0
+ if (C1 == MinVal+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MinVal, dl, N0.getValueType()),
+ ISD::SETEQ);
+ }
+ }
- // SETUGT X, SINTMAX -> SETLT X, 0
- if (Cond == ISD::SETUGT &&
- C1 == APInt::getSignedMaxValue(OperandBitSize))
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(0, dl, N1.getValueType()),
- ISD::SETLT);
+ if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
+ if (C1 == MaxVal)
+ return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
+
+ // TODO: Support this for vectors after legalize ops.
+ if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
+ // Canonicalize setgt X, Min --> setne X, Min
+ if (C1 == MinVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+ // If we have setugt X, Max-1, turn it into seteq X, Max
+ if (C1 == MaxVal-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MaxVal, dl, N0.getValueType()),
+ ISD::SETEQ);
+ }
+ }
- // SETULT X, SINTMIN -> SETGT X, -1
- if (Cond == ISD::SETULT &&
- C1 == APInt::getSignedMinValue(OperandBitSize)) {
- SDValue ConstMinusOne =
- DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
- N1.getValueType());
- return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+ // If we have "setcc X, C0", check to see if we can shrink the immediate
+ // by changing cc.
+ // TODO: Support this for vectors after legalize ops.
+ if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
+ // SETUGT X, SINTMAX -> SETLT X, 0
+ if (Cond == ISD::SETUGT &&
+ C1 == APInt::getSignedMaxValue(OperandBitSize))
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(0, dl, N1.getValueType()),
+ ISD::SETLT);
+
+ // SETULT X, SINTMIN -> SETGT X, -1
+ if (Cond == ISD::SETULT &&
+ C1 == APInt::getSignedMinValue(OperandBitSize)) {
+ SDValue ConstMinusOne =
+ DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
+ N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+ }
}
+ }
+
+ // Back to non-vector simplifications.
+ // TODO: Can we do these for vector splats?
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
// Fold bit comparisons when we can.
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
@@ -1967,9 +2462,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOpcode() == ISD::AND) {
auto &DL = DAG.getDataLayout();
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- EVT ShiftTy = DCI.isBeforeLegalize()
- ? getPointerTy(DL)
- : getShiftAmountTy(N0.getValueType(), DL);
+ EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
+ !DCI.isBeforeLegalize());
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
if (AndRHS->getAPIntValue().isPowerOf2()) {
@@ -2001,9 +2495,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
auto &DL = DAG.getDataLayout();
- EVT ShiftTy = DCI.isBeforeLegalize()
- ? getPointerTy(DL)
- : getShiftAmountTy(N0.getValueType(), DL);
+ EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
+ !DCI.isBeforeLegalize());
EVT CmpTy = N0.getValueType();
SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
DAG.getConstant(ShiftBits, dl,
@@ -2033,9 +2526,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
isLegalICmpImmediate(NewC.getSExtValue())) {
auto &DL = DAG.getDataLayout();
- EVT ShiftTy = DCI.isBeforeLegalize()
- ? getPointerTy(DL)
- : getShiftAmountTy(N0.getValueType(), DL);
+ EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
+ !DCI.isBeforeLegalize());
EVT CmpTy = N0.getValueType();
SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
DAG.getConstant(ShiftBits, dl, ShiftTy));
@@ -2058,9 +2550,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
switch (ISD::getUnorderedFlavor(Cond)) {
default: llvm_unreachable("Unknown flavor!");
case 0: // Known false.
- return DAG.getConstant(0, dl, VT);
+ return DAG.getBoolConstant(false, dl, VT, OpVT);
case 1: // Known true.
- return DAG.getConstant(1, dl, VT);
+ return DAG.getBoolConstant(true, dl, VT, OpVT);
case 2: // Undefined.
return DAG.getUNDEF(VT);
}
@@ -2124,31 +2616,24 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (N0 == N1) {
// The sext(setcc()) => setcc() optimization relies on the appropriate
// constant being emitted.
- uint64_t EqVal = 0;
- switch (getBooleanContents(N0.getValueType())) {
- case UndefinedBooleanContent:
- case ZeroOrOneBooleanContent:
- EqVal = ISD::isTrueWhenEqual(Cond);
- break;
- case ZeroOrNegativeOneBooleanContent:
- EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0;
- break;
- }
+
+ bool EqTrue = ISD::isTrueWhenEqual(Cond);
// We can always fold X == X for integer setcc's.
- if (N0.getValueType().isInteger()) {
- return DAG.getConstant(EqVal, dl, VT);
- }
+ if (N0.getValueType().isInteger())
+ return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
+
unsigned UOF = ISD::getUnorderedFlavor(Cond);
if (UOF == 2) // FP operators that are undefined on NaNs.
- return DAG.getConstant(EqVal, dl, VT);
- if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
- return DAG.getConstant(EqVal, dl, VT);
+ return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
+ if (UOF == unsigned(EqTrue))
+ return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
// if it is not already.
ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
- if (NewCond != Cond && (DCI.isBeforeLegalizeOps() ||
- getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal))
+ if (NewCond != Cond &&
+ (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCond, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N0, N1, NewCond);
}
@@ -2237,7 +2722,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
SDValue SH = DAG.getNode(
ISD::SHL, dl, N1.getValueType(), N1,
DAG.getConstant(1, dl,
- getShiftAmountTy(N1.getValueType(), DL)));
+ getShiftAmountTy(N1.getValueType(), DL,
+ !DCI.isBeforeLegalize())));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(SH.getNode());
return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
@@ -2262,7 +2748,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// X == (Z-X) --> X<<1 == Z
SDValue SH = DAG.getNode(
ISD::SHL, dl, N1.getValueType(), N0,
- DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL)));
+ DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL,
+ !DCI.isBeforeLegalize())));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(SH.getNode());
return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
@@ -2276,50 +2763,52 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Fold away ALL boolean setcc's.
SDValue Temp;
- if (N0.getValueType() == MVT::i1 && foldBooleans) {
+ if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
+ EVT OpVT = N0.getValueType();
switch (Cond) {
default: llvm_unreachable("Unknown integer setcc!");
case ISD::SETEQ: // X == Y -> ~(X^Y)
- Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
- N0 = DAG.getNOT(dl, Temp, MVT::i1);
+ Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
+ N0 = DAG.getNOT(dl, Temp, OpVT);
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(Temp.getNode());
break;
case ISD::SETNE: // X != Y --> (X^Y)
- N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
break;
case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
- Temp = DAG.getNOT(dl, N0, MVT::i1);
- N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp);
+ Temp = DAG.getNOT(dl, N0, OpVT);
+ N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(Temp.getNode());
break;
case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
- Temp = DAG.getNOT(dl, N1, MVT::i1);
- N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp);
+ Temp = DAG.getNOT(dl, N1, OpVT);
+ N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(Temp.getNode());
break;
case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
- Temp = DAG.getNOT(dl, N0, MVT::i1);
- N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp);
+ Temp = DAG.getNOT(dl, N0, OpVT);
+ N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(Temp.getNode());
break;
case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
- Temp = DAG.getNOT(dl, N1, MVT::i1);
- N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp);
+ Temp = DAG.getNOT(dl, N1, OpVT);
+ N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
break;
}
- if (VT != MVT::i1) {
+ if (VT.getScalarType() != MVT::i1) {
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(N0.getNode());
// FIXME: If running after legalize, we probably can't do this.
- N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0);
+ ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
+ N0 = DAG.getNode(ExtendCode, dl, VT, N0);
}
return N0;
}
@@ -2928,7 +3417,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
}
}
-/// \brief Given an exact SDIV by a constant, create a multiplication
+/// Given an exact SDIV by a constant, create a multiplication
/// with the multiplicative inverse of the constant.
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
const SDLoc &dl, SelectionDAG &DAG,
@@ -2970,7 +3459,7 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
return SDValue();
}
-/// \brief Given an ISD::SDIV node expressing a divide by constant,
+/// Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
@@ -3034,7 +3523,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
return DAG.getNode(ISD::ADD, dl, VT, Q, T);
}
-/// \brief Given an ISD::UDIV node expressing a divide by constant,
+/// Given an ISD::UDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
@@ -3413,9 +3902,6 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
return DAG.getMergeValues({ Value, NewChain }, SL);
}
-// FIXME: This relies on each element having a byte size, otherwise the stride
-// is 0 and just overwrites the same location. ExpandStore currently expects
-// this broken behavior.
SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
SelectionDAG &DAG) const {
SDLoc SL(ST);
@@ -3432,11 +3918,43 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
// The type of data as saved in memory.
EVT MemSclVT = StVT.getScalarType();
- // Store Stride in bytes
- unsigned Stride = MemSclVT.getSizeInBits() / 8;
EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
unsigned NumElem = StVT.getVectorNumElements();
+ // A vector must always be stored in memory as-is, i.e. without any padding
+ // between the elements, since various code depend on it, e.g. in the
+ // handling of a bitcast of a vector type to int, which may be done with a
+ // vector store followed by an integer load. A vector that does not have
+ // elements that are byte-sized must therefore be stored as an integer
+ // built out of the extracted vector elements.
+ if (!MemSclVT.isByteSized()) {
+ unsigned NumBits = StVT.getSizeInBits();
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
+
+ SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
+
+ for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
+ DAG.getConstant(Idx, SL, IdxVT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
+ SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
+ unsigned ShiftIntoIdx =
+ (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
+ SDValue ShiftAmount =
+ DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
+ SDValue ShiftedElt =
+ DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
+ CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
+ }
+
+ return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
+ ST->getAlignment(), ST->getMemOperand()->getFlags(),
+ ST->getAAInfo());
+ }
+
+ // Store Stride in bytes
+ unsigned Stride = MemSclVT.getSizeInBits() / 8;
+ assert (Stride && "Zero stride!");
// Extract each of the elements from the original vector and save them into
// memory individually.
SmallVector<SDValue, 8> Stores;
@@ -3475,6 +3993,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) {
// Scalarize the load and let the individual components be handled.
SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
+ if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
+ return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
}
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
index b35bf6ba3a7b..d3454ca6ba6a 100644
--- a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -53,6 +53,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -62,11 +63,13 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
@@ -97,7 +100,7 @@ EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
namespace {
-/// \brief Class to determine where the safe point to insert the
+/// Class to determine where the safe point to insert the
/// prologue and epilogue are.
/// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the
/// shrink-wrapping term for prologue/epilogue placement, this pass
@@ -128,6 +131,9 @@ class ShrinkWrap : public MachineFunctionPass {
/// are in the same loop.
MachineLoopInfo *MLI;
+ // Emit remarks.
+ MachineOptimizationRemarkEmitter *ORE = nullptr;
+
/// Frequency of the Entry block.
uint64_t EntryFreq;
@@ -137,6 +143,9 @@ class ShrinkWrap : public MachineFunctionPass {
/// Current opcode for frame destroy.
unsigned FrameDestroyOpcode;
+ /// Stack pointer register, used by llvm.{savestack,restorestack}
+ unsigned SP;
+
/// Entry block.
const MachineBasicBlock *Entry;
@@ -148,7 +157,7 @@ class ShrinkWrap : public MachineFunctionPass {
/// Current MachineFunction.
MachineFunction *MachineFunc;
- /// \brief Check if \p MI uses or defines a callee-saved register or
+ /// Check if \p MI uses or defines a callee-saved register or
/// a frame index. If this is the case, this means \p MI must happen
/// after Save and before Restore.
bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const;
@@ -168,14 +177,14 @@ class ShrinkWrap : public MachineFunctionPass {
return CurrentCSRs;
}
- /// \brief Update the Save and Restore points such that \p MBB is in
+ /// Update the Save and Restore points such that \p MBB is in
/// the region that is dominated by Save and post-dominated by Restore
/// and Save and Restore still match the safe point definition.
/// Such point may not exist and Save and/or Restore may be null after
/// this call.
void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS);
- /// \brief Initialize the pass for \p MF.
+ /// Initialize the pass for \p MF.
void init(MachineFunction &MF) {
RCI.runOnMachineFunction(MF);
MDT = &getAnalysis<MachineDominatorTree>();
@@ -184,10 +193,13 @@ class ShrinkWrap : public MachineFunctionPass {
Restore = nullptr;
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MLI = &getAnalysis<MachineLoopInfo>();
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
EntryFreq = MBFI->getEntryFreq();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
FrameSetupOpcode = TII.getCallFrameSetupOpcode();
FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+ SP = Subtarget.getTargetLowering()->getStackPointerRegisterToSaveRestore();
Entry = &MF.front();
CurrentCSRs.clear();
MachineFunc = &MF;
@@ -199,7 +211,7 @@ class ShrinkWrap : public MachineFunctionPass {
/// shrink-wrapping.
bool ArePointsInteresting() const { return Save != Entry && Save && Restore; }
- /// \brief Check if shrink wrapping is enabled for this target and function.
+ /// Check if shrink wrapping is enabled for this target and function.
static bool isShrinkWrapEnabled(const MachineFunction &MF);
public:
@@ -215,12 +227,18 @@ public:
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
StringRef getPassName() const override { return "Shrink Wrapping analysis"; }
- /// \brief Perform the shrink-wrapping analysis and update
+ /// Perform the shrink-wrapping analysis and update
/// the MachineFrameInfo attached to \p MF with the results.
bool runOnMachineFunction(MachineFunction &MF) override;
};
@@ -236,28 +254,34 @@ INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
RegScavenger *RS) const {
- // Ignore DBG_VALUE and other meta instructions that must not affect codegen.
- if (MI.isMetaInstruction())
- return false;
-
if (MI.getOpcode() == FrameSetupOpcode ||
MI.getOpcode() == FrameDestroyOpcode) {
- DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
+ LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
return true;
}
for (const MachineOperand &MO : MI.operands()) {
bool UseOrDefCSR = false;
if (MO.isReg()) {
+ // Ignore instructions like DBG_VALUE which don't read/def the register.
+ if (!MO.isDef() && !MO.readsReg())
+ continue;
unsigned PhysReg = MO.getReg();
if (!PhysReg)
continue;
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
"Unallocated register?!");
- UseOrDefCSR = RCI.getLastCalleeSavedAlias(PhysReg);
+ // The stack pointer is not normally described as a callee-saved register
+ // in calling convention definitions, so we need to watch for it
+ // separately. An SP mentioned by a call instruction, we can ignore,
+ // though, as it's harmless and we do not want to effectively disable tail
+ // calls by forcing the restore point to post-dominate them.
+ UseOrDefCSR = (!MI.isCall() && PhysReg == SP) ||
+ RCI.getLastCalleeSavedAlias(PhysReg);
} else if (MO.isRegMask()) {
// Check if this regmask clobbers any of the CSRs.
for (unsigned Reg : getCurrentCSRs(RS)) {
@@ -267,16 +291,17 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
}
}
}
- if (UseOrDefCSR || MO.isFI()) {
- DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI("
- << MO.isFI() << "): " << MI << '\n');
+ // Skip FrameIndex operands in DBG_VALUE instructions.
+ if (UseOrDefCSR || (MO.isFI() && !MI.isDebugValue())) {
+ LLVM_DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI("
+ << MO.isFI() << "): " << MI << '\n');
return true;
}
}
return false;
}
-/// \brief Helper function to find the immediate (post) dominator.
+/// Helper function to find the immediate (post) dominator.
template <typename ListOfBBs, typename DominanceAnalysis>
static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
DominanceAnalysis &Dom) {
@@ -300,7 +325,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
Save = MDT->findNearestCommonDominator(Save, &MBB);
if (!Save) {
- DEBUG(dbgs() << "Found a block that is not reachable from Entry\n");
+ LLVM_DEBUG(dbgs() << "Found a block that is not reachable from Entry\n");
return;
}
@@ -334,7 +359,8 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
}
if (!Restore) {
- DEBUG(dbgs() << "Restore point needs to be spanned on several blocks\n");
+ LLVM_DEBUG(
+ dbgs() << "Restore point needs to be spanned on several blocks\n");
return;
}
@@ -413,38 +439,16 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
}
}
-/// Check whether the edge (\p SrcBB, \p DestBB) is a backedge according to MLI.
-/// I.e., check if it exists a loop that contains SrcBB and where DestBB is the
-/// loop header.
-static bool isProperBackedge(const MachineLoopInfo &MLI,
- const MachineBasicBlock *SrcBB,
- const MachineBasicBlock *DestBB) {
- for (const MachineLoop *Loop = MLI.getLoopFor(SrcBB); Loop;
- Loop = Loop->getParentLoop()) {
- if (Loop->getHeader() == DestBB)
- return true;
- }
- return false;
-}
+static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE,
+ StringRef RemarkName, StringRef RemarkMessage,
+ const DiagnosticLocation &Loc,
+ const MachineBasicBlock *MBB) {
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkMissed(DEBUG_TYPE, RemarkName, Loc, MBB)
+ << RemarkMessage;
+ });
-/// Check if the CFG of \p MF is irreducible.
-static bool isIrreducibleCFG(const MachineFunction &MF,
- const MachineLoopInfo &MLI) {
- const MachineBasicBlock *Entry = &*MF.begin();
- ReversePostOrderTraversal<const MachineBasicBlock *> RPOT(Entry);
- BitVector VisitedBB(MF.getNumBlockIDs());
- for (const MachineBasicBlock *MBB : RPOT) {
- VisitedBB.set(MBB->getNumber());
- for (const MachineBasicBlock *SuccBB : MBB->successors()) {
- if (!VisitedBB.test(SuccBB->getNumber()))
- continue;
- // We already visited SuccBB, thus MBB->SuccBB must be a backedge.
- // Check that the head matches what we have in the loop information.
- // Otherwise, we have an irreducible graph.
- if (!isProperBackedge(MLI, MBB, SuccBB))
- return true;
- }
- }
+ LLVM_DEBUG(dbgs() << RemarkMessage << '\n');
return false;
}
@@ -452,19 +456,21 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
return false;
- DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
init(MF);
- if (isIrreducibleCFG(MF, *MLI)) {
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
// If MF is irreducible, a block may be in a loop without
// MachineLoopInfo reporting it. I.e., we may use the
// post-dominance property in loops, which lead to incorrect
// results. Moreover, we may miss that the prologue and
// epilogue are not in the same loop, leading to unbalanced
// construction/deconstruction of the stack frame.
- DEBUG(dbgs() << "Irreducible CFGs are not supported yet\n");
- return false;
+ return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG",
+ "Irreducible CFGs are not supported yet.",
+ MF.getFunction().getSubprogram(), &MF.front());
}
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
@@ -472,12 +478,28 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
for (MachineBasicBlock &MBB : MF) {
- DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName()
- << '\n');
-
- if (MBB.isEHFuncletEntry()) {
- DEBUG(dbgs() << "EH Funclets are not supported yet.\n");
- return false;
+ LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' '
+ << MBB.getName() << '\n');
+
+ if (MBB.isEHFuncletEntry())
+ return giveUpWithRemarks(ORE, "UnsupportedEHFunclets",
+ "EH Funclets are not supported yet.",
+ MBB.front().getDebugLoc(), &MBB);
+
+ if (MBB.isEHPad()) {
+ // Push the prologue and epilogue outside of
+ // the region that may throw by making sure
+ // that all the landing pads are at least at the
+ // boundary of the save and restore points.
+ // The problem with exceptions is that the throw
+ // is not properly modeled and in particular, a
+ // basic block can jump out from the middle.
+ updateSaveRestorePoints(MBB, RS.get());
+ if (!ArePointsInteresting()) {
+ LLVM_DEBUG(dbgs() << "EHPad prevents shrink-wrapping\n");
+ return false;
+ }
+ continue;
}
for (const MachineInstr &MI : MBB) {
@@ -489,7 +511,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
// If we are at a point where we cannot improve the placement of
// save/restore instructions, just give up.
if (!ArePointsInteresting()) {
- DEBUG(dbgs() << "No Shrink wrap candidate found\n");
+ LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
return false;
}
// No need to look for other instructions, this basic block
@@ -502,20 +524,21 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
// because it means we did not encounter any frame/CSR related code.
// Otherwise, we would have returned from the previous loop.
assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!");
- DEBUG(dbgs() << "Nothing to shrink-wrap\n");
+ LLVM_DEBUG(dbgs() << "Nothing to shrink-wrap\n");
return false;
}
- DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq
- << '\n');
+ LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq
+ << '\n');
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
do {
- DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
- << Save->getNumber() << ' ' << Save->getName() << ' '
- << MBFI->getBlockFreq(Save).getFrequency() << "\nRestore: "
- << Restore->getNumber() << ' ' << Restore->getName() << ' '
- << MBFI->getBlockFreq(Restore).getFrequency() << '\n');
+ LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
+ << Save->getNumber() << ' ' << Save->getName() << ' '
+ << MBFI->getBlockFreq(Save).getFrequency()
+ << "\nRestore: " << Restore->getNumber() << ' '
+ << Restore->getName() << ' '
+ << MBFI->getBlockFreq(Restore).getFrequency() << '\n');
bool IsSaveCheap, TargetCanUseSaveAsPrologue = false;
if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) &&
@@ -523,7 +546,8 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) &&
TFI->canUseAsEpilogue(*Restore)))
break;
- DEBUG(dbgs() << "New points are too expensive or invalid for the target\n");
+ LLVM_DEBUG(
+ dbgs() << "New points are too expensive or invalid for the target\n");
MachineBasicBlock *NewBB;
if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) {
Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
@@ -545,9 +569,10 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
return false;
}
- DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << Save->getNumber()
- << ' ' << Save->getName() << "\nRestore: "
- << Restore->getNumber() << ' ' << Restore->getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
+ << Save->getNumber() << ' ' << Save->getName()
+ << "\nRestore: " << Restore->getNumber() << ' '
+ << Restore->getName() << '\n');
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setSavePoint(Save);
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 17a3a84ecda5..5d2669f5ae92 100644
--- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -27,7 +28,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
#define DEBUG_TYPE "sjljehprepare"
@@ -64,7 +64,6 @@ public:
private:
bool setupEntryBlockAndCallSites(Function &F);
- bool undoSwiftErrorSelect(Function &F);
void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal);
Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst *> LPads);
void lowerIncomingArguments(Function &F);
@@ -233,6 +232,13 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
assert(AfterAllocaInsPt != F.front().end());
for (auto &AI : F.args()) {
+ // Swift error really is a register that we model as memory -- instruction
+ // selection will perform mem-to-reg for us and spill/reload appropriately
+ // around calls that clobber it. There is no need to spill this
+ // value to the stack and doing so would not be allowed.
+ if (AI.isSwiftError())
+ continue;
+
Type *Ty = AI.getType();
// Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction.
@@ -301,8 +307,8 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
for (InvokeInst *Invoke : Invokes) {
BasicBlock *UnwindBlock = Invoke->getUnwindDest();
if (UnwindBlock != &BB && LiveBBs.count(UnwindBlock)) {
- DEBUG(dbgs() << "SJLJ Spill: " << Inst << " around "
- << UnwindBlock->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "SJLJ Spill: " << Inst << " around "
+ << UnwindBlock->getName() << "\n");
NeedsSpill = true;
break;
}
@@ -462,25 +468,6 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
return true;
}
-bool SjLjEHPrepare::undoSwiftErrorSelect(Function &F) {
- // We have inserted dummy copies 'select true, arg, undef' in the entry block
- // for arguments to simplify this pass.
- // swifterror arguments cannot be used in this way. Undo the select for the
- // swifterror argument.
- for (auto &AI : F.args()) {
- if (AI.isSwiftError()) {
- assert(AI.hasOneUse() && "Must have converted the argument to a select");
- auto *Select = dyn_cast<SelectInst>(AI.use_begin()->getUser());
- assert(Select && "There must be single select user");
- auto *OrigSwiftError = cast<Argument>(Select->getTrueValue());
- Select->replaceAllUsesWith(OrigSwiftError);
- Select->eraseFromParent();
- return true;
- }
- }
- return false;
-}
-
bool SjLjEHPrepare::runOnFunction(Function &F) {
Module &M = *F.getParent();
RegisterFn = M.getOrInsertFunction(
@@ -499,7 +486,5 @@ bool SjLjEHPrepare::runOnFunction(Function &F) {
FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
bool Res = setupEntryBlockAndCallSites(F);
- if (Res)
- Res |= undoSwiftErrorSelect(F);
return Res;
}
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
index ea74c777e1e2..ed74b3e4fa19 100644
--- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -10,6 +10,7 @@
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -73,7 +74,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block);
for (MachineInstr &MI : MBB) {
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
continue;
// Insert a store index for the instr.
@@ -94,9 +95,9 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
}
// Sort the Idx2MBBMap
- std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+ llvm::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
- DEBUG(mf->print(dbgs(), this));
+ LLVM_DEBUG(mf->print(dbgs(), this));
// And we're done!
return false;
@@ -146,7 +147,7 @@ void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) {
void SlotIndexes::renumberIndexes() {
// Renumber updates the index of every element of the index list.
- DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
+ LLVM_DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
++NumGlobalRenum;
unsigned index = 0;
@@ -173,8 +174,8 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
// If the next index is bigger, we have caught up.
} while (curItr != indexList.end() && curItr->getIndex() <= index);
- DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex() << '-'
- << index << " ***\n");
+ LLVM_DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex()
+ << '-' << index << " ***\n");
++NumLocalRenum;
}
@@ -244,7 +245,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
MachineInstr &MI = *I;
- if (!MI.isDebugValue() && mi2iMap.find(&MI) == mi2iMap.end())
+ if (!MI.isDebugInstr() && mi2iMap.find(&MI) == mi2iMap.end())
insertMachineInstrInMaps(MI);
}
}
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
index b989b54d4190..f6786b30b21c 100644
--- a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -246,7 +246,7 @@ void SpillPlacement::activate(unsigned n) {
}
}
-/// \brief Set the threshold for a given entry frequency.
+/// Set the threshold for a given entry frequency.
///
/// Set the threshold relative to \c Entry. Since the threshold is used as a
/// bound on the open interval (-Threshold;Threshold), 1 is the minimum
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
index 1628ee28b8a3..d639f4475301 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -39,6 +39,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/Allocator.h"
@@ -191,7 +192,7 @@ void SplitAnalysis::analyzeUses() {
// I am looking at you, RegisterCoalescer!
DidRepairRange = true;
++NumRepairs;
- DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
+ LLVM_DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
const_cast<LiveIntervals&>(LIS)
.shrinkToUses(const_cast<LiveInterval*>(CurLI));
UseBlocks.clear();
@@ -201,10 +202,9 @@ void SplitAnalysis::analyzeUses() {
assert(fixed && "Couldn't fix broken live interval");
}
- DEBUG(dbgs() << "Analyze counted "
- << UseSlots.size() << " instrs in "
- << UseBlocks.size() << " blocks, through "
- << NumThroughBlocks << " blocks.\n");
+ LLVM_DEBUG(dbgs() << "Analyze counted " << UseSlots.size() << " instrs in "
+ << UseBlocks.size() << " blocks, through "
+ << NumThroughBlocks << " blocks.\n");
}
/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
@@ -685,20 +685,20 @@ unsigned SplitEditor::openIntv() {
void SplitEditor::selectIntv(unsigned Idx) {
assert(Idx != 0 && "Cannot select the complement interval");
assert(Idx < Edit->size() && "Can only select previously opened interval");
- DEBUG(dbgs() << " selectIntv " << OpenIdx << " -> " << Idx << '\n');
+ LLVM_DEBUG(dbgs() << " selectIntv " << OpenIdx << " -> " << Idx << '\n');
OpenIdx = Idx;
}
SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
assert(OpenIdx && "openIntv not called before enterIntvBefore");
- DEBUG(dbgs() << " enterIntvBefore " << Idx);
+ LLVM_DEBUG(dbgs() << " enterIntvBefore " << Idx);
Idx = Idx.getBaseIndex();
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
if (!ParentVNI) {
- DEBUG(dbgs() << ": not live\n");
+ LLVM_DEBUG(dbgs() << ": not live\n");
return Idx;
}
- DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
assert(MI && "enterIntvBefore called with invalid index");
@@ -708,14 +708,14 @@ SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) {
assert(OpenIdx && "openIntv not called before enterIntvAfter");
- DEBUG(dbgs() << " enterIntvAfter " << Idx);
+ LLVM_DEBUG(dbgs() << " enterIntvAfter " << Idx);
Idx = Idx.getBoundaryIndex();
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
if (!ParentVNI) {
- DEBUG(dbgs() << ": not live\n");
+ LLVM_DEBUG(dbgs() << ": not live\n");
return Idx;
}
- DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
assert(MI && "enterIntvAfter called with invalid index");
@@ -728,18 +728,18 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
SlotIndex End = LIS.getMBBEndIdx(&MBB);
SlotIndex Last = End.getPrevSlot();
- DEBUG(dbgs() << " enterIntvAtEnd " << printMBBReference(MBB) << ", "
- << Last);
+ LLVM_DEBUG(dbgs() << " enterIntvAtEnd " << printMBBReference(MBB) << ", "
+ << Last);
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last);
if (!ParentVNI) {
- DEBUG(dbgs() << ": not live\n");
+ LLVM_DEBUG(dbgs() << ": not live\n");
return End;
}
- DEBUG(dbgs() << ": valno " << ParentVNI->id);
+ LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id);
VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
SA.getLastSplitPointIter(&MBB));
RegAssign.insert(VNI->def, End, OpenIdx);
- DEBUG(dump());
+ LLVM_DEBUG(dump());
return VNI->def;
}
@@ -750,23 +750,23 @@ void SplitEditor::useIntv(const MachineBasicBlock &MBB) {
void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {
assert(OpenIdx && "openIntv not called before useIntv");
- DEBUG(dbgs() << " useIntv [" << Start << ';' << End << "):");
+ LLVM_DEBUG(dbgs() << " useIntv [" << Start << ';' << End << "):");
RegAssign.insert(Start, End, OpenIdx);
- DEBUG(dump());
+ LLVM_DEBUG(dump());
}
SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
assert(OpenIdx && "openIntv not called before leaveIntvAfter");
- DEBUG(dbgs() << " leaveIntvAfter " << Idx);
+ LLVM_DEBUG(dbgs() << " leaveIntvAfter " << Idx);
// The interval must be live beyond the instruction at Idx.
SlotIndex Boundary = Idx.getBoundaryIndex();
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Boundary);
if (!ParentVNI) {
- DEBUG(dbgs() << ": not live\n");
+ LLVM_DEBUG(dbgs() << ": not live\n");
return Boundary.getNextSlot();
}
- DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
MachineInstr *MI = LIS.getInstructionFromIndex(Boundary);
assert(MI && "No instruction at index");
@@ -788,16 +788,16 @@ SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
assert(OpenIdx && "openIntv not called before leaveIntvBefore");
- DEBUG(dbgs() << " leaveIntvBefore " << Idx);
+ LLVM_DEBUG(dbgs() << " leaveIntvBefore " << Idx);
// The interval must be live into the instruction at Idx.
Idx = Idx.getBaseIndex();
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
if (!ParentVNI) {
- DEBUG(dbgs() << ": not live\n");
+ LLVM_DEBUG(dbgs() << ": not live\n");
return Idx.getNextSlot();
}
- DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
assert(MI && "No instruction at index");
@@ -808,19 +808,19 @@ SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
assert(OpenIdx && "openIntv not called before leaveIntvAtTop");
SlotIndex Start = LIS.getMBBStartIdx(&MBB);
- DEBUG(dbgs() << " leaveIntvAtTop " << printMBBReference(MBB) << ", "
- << Start);
+ LLVM_DEBUG(dbgs() << " leaveIntvAtTop " << printMBBReference(MBB) << ", "
+ << Start);
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
if (!ParentVNI) {
- DEBUG(dbgs() << ": not live\n");
+ LLVM_DEBUG(dbgs() << ": not live\n");
return Start;
}
VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
MBB.SkipPHIsLabelsAndDebug(MBB.begin()));
RegAssign.insert(Start, VNI->def, OpenIdx);
- DEBUG(dump());
+ LLVM_DEBUG(dump());
return VNI->def;
}
@@ -835,9 +835,9 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
// The complement interval will be extended as needed by LRCalc.extend().
if (ParentVNI)
forceRecompute(0, *ParentVNI);
- DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):");
+ LLVM_DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):");
RegAssign.insert(Start, End, OpenIdx);
- DEBUG(dump());
+ LLVM_DEBUG(dump());
}
//===----------------------------------------------------------------------===//
@@ -846,7 +846,7 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
LiveInterval *LI = &LIS.getInterval(Edit->get(0));
- DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n");
+ LLVM_DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n");
RegAssignMap::iterator AssignI;
AssignI.setMap(RegAssign);
@@ -859,9 +859,9 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
MachineBasicBlock::iterator MBBI(MI);
bool AtBegin;
do AtBegin = MBBI == MBB->begin();
- while (!AtBegin && (--MBBI)->isDebugValue());
+ while (!AtBegin && (--MBBI)->isDebugInstr());
- DEBUG(dbgs() << "Removing " << Def << '\t' << *MI);
+ LLVM_DEBUG(dbgs() << "Removing " << Def << '\t' << *MI);
LIS.removeVRegDefAt(*LI, Def);
LIS.RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
@@ -876,11 +876,12 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
continue;
unsigned RegIdx = AssignI.value();
if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) {
- DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n');
+ LLVM_DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx
+ << '\n');
forceRecompute(RegIdx, *Edit->getParent().getVNInfoAt(Def));
} else {
SlotIndex Kill = LIS.getInstructionIndex(*MBBI).getRegSlot();
- DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI);
+ LLVM_DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI);
AssignI.setStop(Kill);
}
}
@@ -907,15 +908,17 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
// MBB isn't in a loop, it doesn't get any better. All dominators have a
// higher frequency by definition.
if (!Loop) {
- DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates "
- << printMBBReference(*MBB) << " at depth 0\n");
+ LLVM_DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB)
+ << " dominates " << printMBBReference(*MBB)
+ << " at depth 0\n");
return MBB;
}
// We'll never be able to exit the DefLoop.
if (Loop == DefLoop) {
- DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates "
- << printMBBReference(*MBB) << " in the same loop\n");
+ LLVM_DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB)
+ << " dominates " << printMBBReference(*MBB)
+ << " in the same loop\n");
return MBB;
}
@@ -924,8 +927,9 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
if (Depth < BestDepth) {
BestMBB = MBB;
BestDepth = Depth;
- DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates "
- << printMBBReference(*MBB) << " at depth " << Depth << '\n');
+ LLVM_DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB)
+ << " dominates " << printMBBReference(*MBB)
+ << " at depth " << Depth << '\n');
}
// Leave loop by going to the immediate dominator of the loop header.
@@ -1031,14 +1035,14 @@ void SplitEditor::hoistCopies() {
// instruction in the complement range. All other copies of ParentVNI
// should be eliminated.
if (VNI->def == ParentVNI->def) {
- DEBUG(dbgs() << "Direct complement def at " << VNI->def << '\n');
+ LLVM_DEBUG(dbgs() << "Direct complement def at " << VNI->def << '\n');
Dom = DomPair(ValMBB, VNI->def);
continue;
}
// Skip the singly mapped values. There is nothing to gain from hoisting a
// single back-copy.
if (Values.lookup(std::make_pair(0, ParentVNI->id)).getPointer()) {
- DEBUG(dbgs() << "Single complement def at " << VNI->def << '\n');
+ LLVM_DEBUG(dbgs() << "Single complement def at " << VNI->def << '\n');
continue;
}
@@ -1062,10 +1066,11 @@ void SplitEditor::hoistCopies() {
Costs[ParentVNI->id] += MBFI.getBlockFreq(ValMBB);
}
- DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def
- << " for parent " << ParentVNI->id << '@' << ParentVNI->def
- << " hoist to " << printMBBReference(*Dom.first) << ' '
- << Dom.second << '\n');
+ LLVM_DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@'
+ << VNI->def << " for parent " << ParentVNI->id << '@'
+ << ParentVNI->def << " hoist to "
+ << printMBBReference(*Dom.first) << ' ' << Dom.second
+ << '\n');
}
// Insert the hoisted copies.
@@ -1118,7 +1123,7 @@ bool SplitEditor::transferValues() {
bool Skipped = false;
RegAssignMap::const_iterator AssignI = RegAssign.begin();
for (const LiveRange::Segment &S : Edit->getParent()) {
- DEBUG(dbgs() << " blit " << S << ':');
+ LLVM_DEBUG(dbgs() << " blit " << S << ':');
VNInfo *ParentVNI = S.valno;
// RegAssign has holes where RegIdx 0 should be used.
SlotIndex Start = S.start;
@@ -1140,14 +1145,14 @@ bool SplitEditor::transferValues() {
}
// The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
- DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx
- << '(' << printReg(Edit->get(RegIdx)) << ')');
+ LLVM_DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx << '('
+ << printReg(Edit->get(RegIdx)) << ')');
LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
// Check for a simply defined value that can be blitted directly.
ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id));
if (VNInfo *VNI = VFP.getPointer()) {
- DEBUG(dbgs() << ':' << VNI->id);
+ LLVM_DEBUG(dbgs() << ':' << VNI->id);
LI.addSegment(LiveInterval::Segment(Start, End, VNI));
Start = End;
continue;
@@ -1155,7 +1160,7 @@ bool SplitEditor::transferValues() {
// Skip values with forced recomputation.
if (VFP.getInt()) {
- DEBUG(dbgs() << "(recalc)");
+ LLVM_DEBUG(dbgs() << "(recalc)");
Skipped = true;
Start = End;
continue;
@@ -1174,7 +1179,7 @@ bool SplitEditor::transferValues() {
if (Start != BlockStart) {
VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End));
assert(VNI && "Missing def for complex mapped value");
- DEBUG(dbgs() << ':' << VNI->id << "*" << printMBBReference(*MBB));
+ LLVM_DEBUG(dbgs() << ':' << VNI->id << "*" << printMBBReference(*MBB));
// MBB has its own def. Is it also live-out?
if (BlockEnd <= End)
LRC.setLiveOutValue(&*MBB, VNI);
@@ -1187,7 +1192,7 @@ bool SplitEditor::transferValues() {
// Handle the live-in blocks covered by [Start;End).
assert(Start <= BlockStart && "Expected live-in block");
while (BlockStart < End) {
- DEBUG(dbgs() << ">" << printMBBReference(*MBB));
+ LLVM_DEBUG(dbgs() << ">" << printMBBReference(*MBB));
BlockEnd = LIS.getMBBEndIdx(&*MBB);
if (BlockStart == ParentVNI->def) {
// This block has the def of a parent PHI, so it isn't live-in.
@@ -1212,7 +1217,7 @@ bool SplitEditor::transferValues() {
}
Start = End;
} while (Start != S.end);
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
}
LRCalc[0].calculateValues();
@@ -1314,7 +1319,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
++RI;
// LiveDebugVariables should have handled all DBG_VALUE instructions.
if (MI->isDebugValue()) {
- DEBUG(dbgs() << "Zapping " << *MI);
+ LLVM_DEBUG(dbgs() << "Zapping " << *MI);
MO.setReg(0);
continue;
}
@@ -1330,8 +1335,8 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
unsigned RegIdx = RegAssign.lookup(Idx);
LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
MO.setReg(LI.reg);
- DEBUG(dbgs() << " rewr " << printMBBReference(*MI->getParent()) << '\t'
- << Idx << ':' << RegIdx << '\t' << *MI);
+ LLVM_DEBUG(dbgs() << " rewr " << printMBBReference(*MI->getParent())
+ << '\t' << Idx << ':' << RegIdx << '\t' << *MI);
// Extend liveness to Idx if the instruction reads reg.
if (!ExtendRanges || MO.isUndef())
@@ -1416,7 +1421,7 @@ void SplitEditor::deleteRematVictims() {
if (!MI->allDefsAreDead())
continue;
- DEBUG(dbgs() << "All defs dead: " << *MI);
+ LLVM_DEBUG(dbgs() << "All defs dead: " << *MI);
Dead.push_back(MI);
}
}
@@ -1598,9 +1603,9 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
SlotIndex Start, Stop;
std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum);
- DEBUG(dbgs() << "%bb." << MBBNum << " [" << Start << ';' << Stop << ") intf "
- << LeaveBefore << '-' << EnterAfter << ", live-through "
- << IntvIn << " -> " << IntvOut);
+ LLVM_DEBUG(dbgs() << "%bb." << MBBNum << " [" << Start << ';' << Stop
+ << ") intf " << LeaveBefore << '-' << EnterAfter
+ << ", live-through " << IntvIn << " -> " << IntvOut);
assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks");
@@ -1611,7 +1616,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
if (!IntvOut) {
- DEBUG(dbgs() << ", spill on entry.\n");
+ LLVM_DEBUG(dbgs() << ", spill on entry.\n");
//
// <<<<<<<<< Possible LeaveBefore interference.
// |-----------| Live through.
@@ -1625,7 +1630,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
}
if (!IntvIn) {
- DEBUG(dbgs() << ", reload on exit.\n");
+ LLVM_DEBUG(dbgs() << ", reload on exit.\n");
//
// >>>>>>> Possible EnterAfter interference.
// |-----------| Live through.
@@ -1639,7 +1644,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
}
if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) {
- DEBUG(dbgs() << ", straight through.\n");
+ LLVM_DEBUG(dbgs() << ", straight through.\n");
//
// |-----------| Live through.
// ------------- Straight through, same intv, no interference.
@@ -1655,7 +1660,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter ||
LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) {
- DEBUG(dbgs() << ", switch avoiding interference.\n");
+ LLVM_DEBUG(dbgs() << ", switch avoiding interference.\n");
//
// >>>> <<<< Non-overlapping EnterAfter/LeaveBefore interference.
// |-----------| Live through.
@@ -1676,7 +1681,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
return;
}
- DEBUG(dbgs() << ", create local intv for interference.\n");
+ LLVM_DEBUG(dbgs() << ", create local intv for interference.\n");
//
// >>><><><><<<< Overlapping EnterAfter/LeaveBefore interference.
// |-----------| Live through.
@@ -1700,17 +1705,18 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
SlotIndex Start, Stop;
std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
- DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' << Stop
- << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
- << ", reg-in " << IntvIn << ", leave before " << LeaveBefore
- << (BI.LiveOut ? ", stack-out" : ", killed in block"));
+ LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';'
+ << Stop << "), uses " << BI.FirstInstr << '-'
+ << BI.LastInstr << ", reg-in " << IntvIn
+ << ", leave before " << LeaveBefore
+ << (BI.LiveOut ? ", stack-out" : ", killed in block"));
assert(IntvIn && "Must have register in");
assert(BI.LiveIn && "Must be live-in");
assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference");
if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastInstr)) {
- DEBUG(dbgs() << " before interference.\n");
+ LLVM_DEBUG(dbgs() << " before interference.\n");
//
// <<< Interference after kill.
// |---o---x | Killed in block.
@@ -1735,13 +1741,13 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
// \_____ Stack interval is live-out.
//
if (BI.LastInstr < LSP) {
- DEBUG(dbgs() << ", spill after last use before interference.\n");
+ LLVM_DEBUG(dbgs() << ", spill after last use before interference.\n");
selectIntv(IntvIn);
SlotIndex Idx = leaveIntvAfter(BI.LastInstr);
useIntv(Start, Idx);
assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
} else {
- DEBUG(dbgs() << ", spill before last split point.\n");
+ LLVM_DEBUG(dbgs() << ", spill before last split point.\n");
selectIntv(IntvIn);
SlotIndex Idx = leaveIntvBefore(LSP);
overlapIntv(Idx, BI.LastInstr);
@@ -1756,7 +1762,7 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
// different register.
unsigned LocalIntv = openIntv();
(void)LocalIntv;
- DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n");
+ LLVM_DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n");
if (!BI.LiveOut || BI.LastInstr < LSP) {
//
@@ -1792,10 +1798,11 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
SlotIndex Start, Stop;
std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
- DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' << Stop
- << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
- << ", reg-out " << IntvOut << ", enter after " << EnterAfter
- << (BI.LiveIn ? ", stack-in" : ", defined in block"));
+ LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';'
+ << Stop << "), uses " << BI.FirstInstr << '-'
+ << BI.LastInstr << ", reg-out " << IntvOut
+ << ", enter after " << EnterAfter
+ << (BI.LiveIn ? ", stack-in" : ", defined in block"));
SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
@@ -1804,7 +1811,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
assert((!EnterAfter || EnterAfter < LSP) && "Bad interference");
if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstInstr)) {
- DEBUG(dbgs() << " after interference.\n");
+ LLVM_DEBUG(dbgs() << " after interference.\n");
//
// >>>> Interference before def.
// | o---o---| Defined in block.
@@ -1816,7 +1823,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
}
if (!EnterAfter || EnterAfter < BI.FirstInstr.getBaseIndex()) {
- DEBUG(dbgs() << ", reload after interference.\n");
+ LLVM_DEBUG(dbgs() << ", reload after interference.\n");
//
// >>>> Interference before def.
// |---o---o---| Live-through, stack-in.
@@ -1832,7 +1839,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
// The interference is overlapping somewhere we wanted to use IntvOut. That
// means we need to create a local interval that can be allocated a
// different register.
- DEBUG(dbgs() << ", interference overlaps uses.\n");
+ LLVM_DEBUG(dbgs() << ", interference overlaps uses.\n");
//
// >>>>>>> Interference overlapping uses.
// |---o---o---| Live-through, stack-in.
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
index 2dafaf587801..ed664e4f81a3 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -421,7 +421,7 @@ private:
SlotIndex buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore,
- unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex PrevCopy);
+ unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def);
public:
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
index 608845498b48..81a41970f9e2 100644
--- a/contrib/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -39,9 +39,9 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
@@ -422,9 +422,6 @@ class StackColoring : public MachineFunctionPass {
/// SlotIndex analysis object.
SlotIndexes *Indexes;
- /// The stack protector object.
- StackProtector *SP;
-
/// The list of lifetime markers found. These markers are to be removed
/// once the coloring is done.
SmallVector<MachineInstr*, 8> Markers;
@@ -448,7 +445,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &Func) override;
private:
/// Used in collectMarkers
@@ -523,13 +520,11 @@ char &llvm::StackColoringID = StackColoring::ID;
INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,
"Merge disjoint stack slots", false, false)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(StackProtector)
INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE,
"Merge disjoint stack slots", false, false)
void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<SlotIndexes>();
- AU.addRequired<StackProtector>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -600,12 +595,12 @@ bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
isStart = false;
return true;
}
- if (! applyFirstUse(Slot)) {
+ if (!applyFirstUse(Slot)) {
isStart = true;
return true;
}
} else if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) {
- if (! MI.isDebugValue()) {
+ if (!MI.isDebugInstr()) {
bool found = false;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isFI())
@@ -672,13 +667,13 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
}
const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
if (Allocation) {
- DEBUG(dbgs() << "Found a lifetime ");
- DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START
- ? "start"
- : "end"));
- DEBUG(dbgs() << " marker for slot #" << Slot);
- DEBUG(dbgs() << " with allocation: " << Allocation->getName()
- << "\n");
+ LLVM_DEBUG(dbgs() << "Found a lifetime ");
+ LLVM_DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START
+ ? "start"
+ : "end"));
+ LLVM_DEBUG(dbgs() << " marker for slot #" << Slot);
+ LLVM_DEBUG(dbgs()
+ << " with allocation: " << Allocation->getName() << "\n");
}
Markers.push_back(&MI);
MarkersFound += 1;
@@ -707,7 +702,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
for (unsigned slot = 0; slot < NumSlot; ++slot)
if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)
ConservativeSlots.set(slot);
- DEBUG(dumpBV("Conservative slots", ConservativeSlots));
+ LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));
// Step 2: compute begin/end sets for each block
@@ -738,14 +733,16 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
BlockInfo.End.set(Slot);
} else {
for (auto Slot : slots) {
- DEBUG(dbgs() << "Found a use of slot #" << Slot);
- DEBUG(dbgs() << " at " << printMBBReference(*MBB) << " index ");
- DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
+ LLVM_DEBUG(dbgs() << "Found a use of slot #" << Slot);
+ LLVM_DEBUG(dbgs()
+ << " at " << printMBBReference(*MBB) << " index ");
+ LLVM_DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
if (Allocation) {
- DEBUG(dbgs() << " with allocation: "<< Allocation->getName());
+ LLVM_DEBUG(dbgs()
+ << " with allocation: " << Allocation->getName());
}
- DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "\n");
if (BlockInfo.End.test(Slot)) {
BlockInfo.End.reset(Slot);
}
@@ -779,8 +776,11 @@ void StackColoring::calculateLocalLiveness() {
for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
PE = BB->pred_end(); PI != PE; ++PI) {
LivenessMap::const_iterator I = BlockLiveness.find(*PI);
- assert(I != BlockLiveness.end() && "Predecessor not found");
- LocalLiveIn |= I->second.LiveOut;
+ // PR37130: transformations prior to stack coloring can
+ // sometimes leave behind statically unreachable blocks; these
+ // can be safely skipped here.
+ if (I != BlockLiveness.end())
+ LocalLiveIn |= I->second.LiveOut;
}
// Compute LiveOut by subtracting out lifetimes that end in this
@@ -880,7 +880,7 @@ bool StackColoring::removeAllMarkers() {
}
Markers.clear();
- DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n");
+ LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n");
return Count;
}
@@ -894,8 +894,8 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
if (!VI.Var)
continue;
if (SlotRemap.count(VI.Slot)) {
- DEBUG(dbgs() << "Remapping debug info for ["
- << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
+ LLVM_DEBUG(dbgs() << "Remapping debug info for ["
+ << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
VI.Slot = SlotRemap[VI.Slot];
FixedDbg++;
}
@@ -930,9 +930,17 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
MergedAllocas.insert(From);
MergedAllocas.insert(To);
- // Allow the stack protector to adjust its value map to account for the
- // upcoming replacement.
- SP->adjustForColoring(From, To);
+ // Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf
+ // does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure
+ // that SSPLK_SmallArray does not overwrite SSPLK_LargeArray.
+ MachineFrameInfo::SSPLayoutKind FromKind
+ = MFI->getObjectSSPLayout(SI.first);
+ MachineFrameInfo::SSPLayoutKind ToKind = MFI->getObjectSSPLayout(SI.second);
+ if (FromKind != MachineFrameInfo::SSPLK_None &&
+ (ToKind == MachineFrameInfo::SSPLK_None ||
+ (ToKind != MachineFrameInfo::SSPLK_LargeArray &&
+ FromKind != MachineFrameInfo::SSPLK_AddrOf)))
+ MFI->setObjectSSPLayout(SI.second, FromKind);
// The new alloca might not be valid in a llvm.dbg.declare for this
// variable, so undef out the use to make the verifier happy.
@@ -993,13 +1001,13 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// the calculated range then it means that the alloca usage moved
// outside of the lifetime markers, or that the user has a bug.
// NOTE: Alloca address calculations which happen outside the lifetime
- // zone are are okay, despite the fact that we don't have a good way
+ // zone are okay, despite the fact that we don't have a good way
// for validating all of the usages of the calculation.
#ifndef NDEBUG
bool TouchesMemory = I.mayLoad() || I.mayStore();
// If we *don't* protect the user from escaped allocas, don't bother
// validating the instructions.
- if (!I.isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) {
+ if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {
SlotIndex Index = Indexes->getInstructionIndex(I);
const LiveInterval *Interval = &*Intervals[FromSlot];
assert(Interval->find(Index) != Interval->end() &&
@@ -1064,16 +1072,16 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
SlotRemap.count(H.CatchObj.FrameIndex))
H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
- DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n");
- DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n");
- DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n");
+ LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n");
+ LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n");
+ LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n");
}
void StackColoring::removeInvalidSlotRanges() {
for (MachineBasicBlock &BB : *MF)
for (MachineInstr &I : BB) {
if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
- I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugValue())
+ I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugInstr())
continue;
// Some intervals are suspicious! In some cases we find address
@@ -1104,7 +1112,7 @@ void StackColoring::removeInvalidSlotRanges() {
SlotIndex Index = Indexes->getInstructionIndex(I);
if (Interval->find(Index) == Interval->end()) {
Interval->clear();
- DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n");
+ LLVM_DEBUG(dbgs() << "Invalidating range #" << Slot << "\n");
EscapedAllocas++;
}
}
@@ -1128,12 +1136,11 @@ void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
}
bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
- DEBUG(dbgs() << "********** Stack Coloring **********\n"
- << "********** Function: " << Func.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n"
+ << "********** Function: " << Func.getName() << '\n');
MF = &Func;
MFI = &MF->getFrameInfo();
Indexes = &getAnalysis<SlotIndexes>();
- SP = &getAnalysis<StackProtector>();
BlockLiveness.clear();
BasicBlocks.clear();
BasicBlockNumbering.clear();
@@ -1156,21 +1163,23 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
unsigned NumMarkers = collectMarkers(NumSlots);
unsigned TotalSize = 0;
- DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n");
- DEBUG(dbgs()<<"Slot structure:\n");
+ LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots
+ << " slots\n");
+ LLVM_DEBUG(dbgs() << "Slot structure:\n");
for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
- DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n");
+ LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i)
+ << " bytes.\n");
TotalSize += MFI->getObjectSize(i);
}
- DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n");
+ LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n");
// Don't continue because there are not enough lifetime markers, or the
// stack is too small, or we are told not to optimize the slots.
if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
skipFunction(Func.getFunction())) {
- DEBUG(dbgs()<<"Will not try to merge slots.\n");
+ LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n");
return removeAllMarkers();
}
@@ -1183,12 +1192,12 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
// Calculate the liveness of each block.
calculateLocalLiveness();
- DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
- DEBUG(dump());
+ LLVM_DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
+ LLVM_DEBUG(dump());
// Propagate the liveness information.
calculateLiveIntervals(NumSlots);
- DEBUG(dumpIntervals());
+ LLVM_DEBUG(dumpIntervals());
// Search for allocas which are used outside of the declared lifetime
// markers.
@@ -1224,7 +1233,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
});
for (auto &s : LiveStarts)
- std::sort(s.begin(), s.end());
+ llvm::sort(s.begin(), s.end());
bool Changed = true;
while (Changed) {
@@ -1259,8 +1268,8 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
SlotRemap[SecondSlot] = FirstSlot;
SortedSlots[J] = -1;
- DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
- SecondSlot<<" together.\n");
+ LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #"
+ << SecondSlot << " together.\n");
unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
MFI->getObjectAlignment(SecondSlot));
@@ -1280,8 +1289,8 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
// Record statistics.
StackSpaceSaved += ReducedSize;
StackSlotMerged += RemovedSlots;
- DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<<
- ReducedSize<<" bytes\n");
+ LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved "
+ << ReducedSize << " bytes\n");
// Scan the entire function and update all machine operands that use frame
// indices to use the remapped frame index.
diff --git a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index cc9af92c395f..00cf8070be5e 100644
--- a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -39,7 +39,7 @@ STATISTIC(NumBBsHaveNoStackmap, "Number of basic blocks with no stackmap");
STATISTIC(NumStackMaps, "Number of StackMaps visited");
namespace {
-/// \brief This pass calculates the liveness information for each basic block in
+/// This pass calculates the liveness information for each basic block in
/// a function and attaches the register live-out information to a patchpoint
/// intrinsic if present.
///
@@ -54,10 +54,10 @@ class StackMapLiveness : public MachineFunctionPass {
public:
static char ID;
- /// \brief Default construct and initialize the pass.
+ /// Default construct and initialize the pass.
StackMapLiveness();
- /// \brief Tell the pass manager which passes we depend on and what
+ /// Tell the pass manager which passes we depend on and what
/// information we preserve.
void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -66,17 +66,17 @@ public:
MachineFunctionProperties::Property::NoVRegs);
}
- /// \brief Calculate the liveness information for the given machine function.
+ /// Calculate the liveness information for the given machine function.
bool runOnMachineFunction(MachineFunction &MF) override;
private:
- /// \brief Performs the actual liveness calculation for the function.
+ /// Performs the actual liveness calculation for the function.
bool calculateLiveness(MachineFunction &MF);
- /// \brief Add the current register live set to the instruction.
+ /// Add the current register live set to the instruction.
void addLiveOutSetToMI(MachineFunction &MF, MachineInstr &MI);
- /// \brief Create a register mask and initialize it with the registers from
+ /// Create a register mask and initialize it with the registers from
/// the register live set.
uint32_t *createRegisterMask(MachineFunction &MF) const;
};
@@ -106,8 +106,8 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
if (!EnablePatchPointLiveness)
return false;
- DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << MF.getName()
- << " **********\n");
+ LLVM_DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: "
+ << MF.getName() << " **********\n");
TRI = MF.getSubtarget().getRegisterInfo();
++NumStackMapFuncVisited;
@@ -124,7 +124,7 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {
bool HasChanged = false;
// For all basic blocks in the function.
for (auto &MBB : MF) {
- DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n");
+ LLVM_DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n");
LiveRegs.init(*TRI);
// FIXME: This should probably be addLiveOuts().
LiveRegs.addLiveOutsNoPristines(MBB);
@@ -138,7 +138,7 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {
HasStackMap = true;
++NumStackMaps;
}
- DEBUG(dbgs() << " " << LiveRegs << " " << *I);
+ LLVM_DEBUG(dbgs() << " " << LiveRegs << " " << *I);
LiveRegs.stepBackward(*I);
}
++NumBBsVisited;
@@ -160,7 +160,7 @@ void StackMapLiveness::addLiveOutSetToMI(MachineFunction &MF,
/// register live set.
uint32_t *StackMapLiveness::createRegisterMask(MachineFunction &MF) const {
// The mask is owned and cleaned up by the Machine Function.
- uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs());
+ uint32_t *Mask = MF.allocateRegMask();
for (auto Reg : LiveRegs)
Mask[Reg / 32] |= 1U << (Reg % 32);
diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp
index e66a25bec911..19a191c01db9 100644
--- a/contrib/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp
@@ -268,11 +268,11 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
// in the list. Merge entries that refer to the same dwarf register and use
// the maximum size that needs to be spilled.
- std::sort(LiveOuts.begin(), LiveOuts.end(),
- [](const LiveOutReg &LHS, const LiveOutReg &RHS) {
- // Only sort by the dwarf register number.
- return LHS.DwarfRegNum < RHS.DwarfRegNum;
- });
+ llvm::sort(LiveOuts.begin(), LiveOuts.end(),
+ [](const LiveOutReg &LHS, const LiveOutReg &RHS) {
+ // Only sort by the dwarf register number.
+ return LHS.DwarfRegNum < RHS.DwarfRegNum;
+ });
for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) {
for (auto II = std::next(I); II != E; ++II) {
@@ -420,13 +420,13 @@ void StackMaps::emitStackmapHeader(MCStreamer &OS) {
OS.EmitIntValue(0, 2); // Reserved.
// Num functions.
- DEBUG(dbgs() << WSMP << "#functions = " << FnInfos.size() << '\n');
+ LLVM_DEBUG(dbgs() << WSMP << "#functions = " << FnInfos.size() << '\n');
OS.EmitIntValue(FnInfos.size(), 4);
// Num constants.
- DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n');
+ LLVM_DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n');
OS.EmitIntValue(ConstPool.size(), 4);
// Num callsites.
- DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n');
+ LLVM_DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n');
OS.EmitIntValue(CSInfos.size(), 4);
}
@@ -439,11 +439,11 @@ void StackMaps::emitStackmapHeader(MCStreamer &OS) {
/// }
void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {
// Function Frame records.
- DEBUG(dbgs() << WSMP << "functions:\n");
+ LLVM_DEBUG(dbgs() << WSMP << "functions:\n");
for (auto const &FR : FnInfos) {
- DEBUG(dbgs() << WSMP << "function addr: " << FR.first
- << " frame size: " << FR.second.StackSize
- << " callsite count: " << FR.second.RecordCount << '\n');
+ LLVM_DEBUG(dbgs() << WSMP << "function addr: " << FR.first
+ << " frame size: " << FR.second.StackSize
+ << " callsite count: " << FR.second.RecordCount << '\n');
OS.EmitSymbolValue(FR.first, 8);
OS.EmitIntValue(FR.second.StackSize, 8);
OS.EmitIntValue(FR.second.RecordCount, 8);
@@ -455,9 +455,9 @@ void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {
/// int64 : Constants[NumConstants]
void StackMaps::emitConstantPoolEntries(MCStreamer &OS) {
// Constant pool entries.
- DEBUG(dbgs() << WSMP << "constants:\n");
+ LLVM_DEBUG(dbgs() << WSMP << "constants:\n");
for (const auto &ConstEntry : ConstPool) {
- DEBUG(dbgs() << WSMP << ConstEntry.second << '\n');
+ LLVM_DEBUG(dbgs() << WSMP << ConstEntry.second << '\n');
OS.EmitIntValue(ConstEntry.second, 8);
}
}
@@ -492,7 +492,7 @@ void StackMaps::emitConstantPoolEntries(MCStreamer &OS) {
/// 0x4, Constant, Offset (small constant)
/// 0x5, ConstIndex, Constants[Offset] (large constant)
void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
- DEBUG(print(dbgs()));
+ LLVM_DEBUG(print(dbgs()));
// Callsite entries.
for (const auto &CSI : CSInfos) {
const LocationVec &CSLocs = CSI.Locations;
@@ -569,7 +569,7 @@ void StackMaps::serializeToStackMapSection() {
OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps")));
// Serialize data.
- DEBUG(dbgs() << "********** Stack Map Output **********\n");
+ LLVM_DEBUG(dbgs() << "********** Stack Map Output **********\n");
emitStackmapHeader(OS);
emitFunctionFrameRecords(OS);
emitConstantPoolEntries(OS);
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
index 62cef95a4af2..cb12c7ce6e82 100644
--- a/contrib/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
@@ -69,32 +70,6 @@ INITIALIZE_PASS_END(StackProtector, DEBUG_TYPE,
FunctionPass *llvm::createStackProtectorPass() { return new StackProtector(); }
-StackProtector::SSPLayoutKind
-StackProtector::getSSPLayout(const AllocaInst *AI) const {
- return AI ? Layout.lookup(AI) : SSPLK_None;
-}
-
-void StackProtector::adjustForColoring(const AllocaInst *From,
- const AllocaInst *To) {
- // When coloring replaces one alloca with another, transfer the SSPLayoutKind
- // tag from the remapped to the target alloca. The remapped alloca should
- // have a size smaller than or equal to the replacement alloca.
- SSPLayoutMap::iterator I = Layout.find(From);
- if (I != Layout.end()) {
- SSPLayoutKind Kind = I->second;
- Layout.erase(I);
-
- // Transfer the tag, but make sure that SSPLK_AddrOf does not overwrite
- // SSPLK_SmallArray or SSPLK_LargeArray, and make sure that
- // SSPLK_SmallArray does not overwrite SSPLK_LargeArray.
- I = Layout.find(To);
- if (I == Layout.end())
- Layout.insert(std::make_pair(To, Kind));
- else if (I->second != SSPLK_LargeArray && Kind != SSPLK_AddrOf)
- I->second = Kind;
- }
-}
-
void StackProtector::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -182,6 +157,14 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
return NeedsProtector;
}
+static bool isLifetimeInst(const Instruction *I) {
+ if (const auto Intrinsic = dyn_cast<IntrinsicInst>(I)) {
+ const auto Id = Intrinsic->getIntrinsicID();
+ return Id == Intrinsic::lifetime_start || Id == Intrinsic::lifetime_end;
+ }
+ return false;
+}
+
bool StackProtector::HasAddressTaken(const Instruction *AI) {
for (const User *U : AI->users()) {
if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
@@ -190,8 +173,10 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
} else if (const PtrToIntInst *SI = dyn_cast<PtrToIntInst>(U)) {
if (AI == SI->getOperand(0))
return true;
- } else if (isa<CallInst>(U)) {
- return true;
+ } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
+ // Ignore intrinsics that are not calls. TODO: Use isLoweredToCall().
+ if (!isa<DbgInfoIntrinsic>(CI) && !isLifetimeInst(CI))
+ return true;
} else if (isa<InvokeInst>(U)) {
return true;
} else if (const SelectInst *SI = dyn_cast<SelectInst>(U)) {
@@ -214,7 +199,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
return false;
}
-/// \brief Check whether or not this function needs a stack protector based
+/// Check whether or not this function needs a stack protector based
/// upon the stack protector level.
///
/// We use two heuristics: a standard (ssp) and strong (sspstrong).
@@ -278,18 +263,21 @@ bool StackProtector::RequiresStackProtector() {
if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {
// A call to alloca with size >= SSPBufferSize requires
// stack protectors.
- Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
+ Layout.insert(std::make_pair(AI,
+ MachineFrameInfo::SSPLK_LargeArray));
ORE.emit(RemarkBuilder);
NeedsProtector = true;
} else if (Strong) {
// Require protectors for all alloca calls in strong mode.
- Layout.insert(std::make_pair(AI, SSPLK_SmallArray));
+ Layout.insert(std::make_pair(AI,
+ MachineFrameInfo::SSPLK_SmallArray));
ORE.emit(RemarkBuilder);
NeedsProtector = true;
}
} else {
// A call to alloca with a variable size requires protectors.
- Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
+ Layout.insert(std::make_pair(AI,
+ MachineFrameInfo::SSPLK_LargeArray));
ORE.emit(RemarkBuilder);
NeedsProtector = true;
}
@@ -298,8 +286,9 @@ bool StackProtector::RequiresStackProtector() {
bool IsLarge = false;
if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) {
- Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray
- : SSPLK_SmallArray));
+ Layout.insert(std::make_pair(AI, IsLarge
+ ? MachineFrameInfo::SSPLK_LargeArray
+ : MachineFrameInfo::SSPLK_SmallArray));
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I)
<< "Stack protection applied to function "
@@ -313,7 +302,7 @@ bool StackProtector::RequiresStackProtector() {
if (Strong && HasAddressTaken(AI)) {
++NumAddrTaken;
- Layout.insert(std::make_pair(AI, SSPLK_AddrOf));
+ Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf));
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken",
&I)
@@ -523,3 +512,23 @@ BasicBlock *StackProtector::CreateFailBB() {
bool StackProtector::shouldEmitSDCheck(const BasicBlock &BB) const {
return HasPrologue && !HasIRCheck && dyn_cast<ReturnInst>(BB.getTerminator());
}
+
+void StackProtector::copyToMachineFrameInfo(MachineFrameInfo &MFI) const {
+ if (Layout.empty())
+ return;
+
+ for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
+ if (MFI.isDeadObjectIndex(I))
+ continue;
+
+ const AllocaInst *AI = MFI.getObjectAllocation(I);
+ if (!AI)
+ continue;
+
+ SSPLayoutMap::const_iterator LI = Layout.find(AI);
+ if (LI == Layout.end())
+ continue;
+
+ MFI.setObjectSSPLayout(I, LI->second);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
index 8fc7a4a32842..eb15b15a24a6 100644
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -82,14 +82,14 @@ namespace {
// AllColors - If index is set, it's a spill slot, i.e. color.
// FIXME: This assumes PEI locate spill slot with smaller indices
// closest to stack pointer / frame pointer. Therefore, smaller
- // index == better color.
- BitVector AllColors;
+ // index == better color. This is per stack ID.
+ SmallVector<BitVector, 2> AllColors;
- // NextColor - Next "color" that's not yet used.
- int NextColor = -1;
+ // NextColor - Next "color" that's not yet used. This is per stack ID.
+ SmallVector<int, 2> NextColors = { -1 };
- // UsedColors - "Colors" that have been assigned.
- BitVector UsedColors;
+ // UsedColors - "Colors" that have been assigned. This is per stack ID
+ SmallVector<BitVector, 2> UsedColors;
// Assignments - Color to intervals mapping.
SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments;
@@ -196,10 +196,15 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
/// to a sorted (by weight) list.
void StackSlotColoring::InitializeSlots() {
int LastFI = MFI->getObjectIndexEnd();
+
+ // There is always at least one stack ID.
+ AllColors.resize(1);
+ UsedColors.resize(1);
+
OrigAlignments.resize(LastFI);
OrigSizes.resize(LastFI);
- AllColors.resize(LastFI);
- UsedColors.resize(LastFI);
+ AllColors[0].resize(LastFI);
+ UsedColors[0].resize(LastFI);
Assignments.resize(LastFI);
using Pair = std::iterator_traits<LiveStacks::iterator>::value_type;
@@ -209,29 +214,42 @@ void StackSlotColoring::InitializeSlots() {
Intervals.reserve(LS->getNumIntervals());
for (auto &I : *LS)
Intervals.push_back(&I);
- std::sort(Intervals.begin(), Intervals.end(),
- [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; });
+ llvm::sort(Intervals.begin(), Intervals.end(),
+ [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; });
// Gather all spill slots into a list.
- DEBUG(dbgs() << "Spill slot intervals:\n");
+ LLVM_DEBUG(dbgs() << "Spill slot intervals:\n");
for (auto *I : Intervals) {
LiveInterval &li = I->second;
- DEBUG(li.dump());
+ LLVM_DEBUG(li.dump());
int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
if (MFI->isDeadObjectIndex(FI))
continue;
+
SSIntervals.push_back(&li);
OrigAlignments[FI] = MFI->getObjectAlignment(FI);
OrigSizes[FI] = MFI->getObjectSize(FI);
- AllColors.set(FI);
+
+ auto StackID = MFI->getStackID(FI);
+ if (StackID != 0) {
+ AllColors.resize(StackID + 1);
+ UsedColors.resize(StackID + 1);
+ AllColors[StackID].resize(LastFI);
+ UsedColors[StackID].resize(LastFI);
+ }
+
+ AllColors[StackID].set(FI);
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
// Sort them by weight.
std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+ NextColors.resize(AllColors.size());
+
// Get first "color".
- NextColor = AllColors.find_first();
+ for (unsigned I = 0, E = AllColors.size(); I != E; ++I)
+ NextColors[I] = AllColors[I].find_first();
}
/// OverlapWithAssignments - Return true if LiveInterval overlaps with any
@@ -252,37 +270,41 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
int Color = -1;
bool Share = false;
int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
+ uint8_t StackID = MFI->getStackID(FI);
if (!DisableSharing) {
+
// Check if it's possible to reuse any of the used colors.
- Color = UsedColors.find_first();
+ Color = UsedColors[StackID].find_first();
while (Color != -1) {
if (!OverlapWithAssignments(li, Color)) {
Share = true;
++NumEliminated;
break;
}
- Color = UsedColors.find_next(Color);
+ Color = UsedColors[StackID].find_next(Color);
}
}
if (Color != -1 && MFI->getStackID(Color) != MFI->getStackID(FI)) {
- DEBUG(dbgs() << "cannot share FIs with different stack IDs\n");
+ LLVM_DEBUG(dbgs() << "cannot share FIs with different stack IDs\n");
Share = false;
}
// Assign it to the first available color (assumed to be the best) if it's
// not possible to share a used color with other objects.
if (!Share) {
- assert(NextColor != -1 && "No more spill slots?");
- Color = NextColor;
- UsedColors.set(Color);
- NextColor = AllColors.find_next(NextColor);
+ assert(NextColors[StackID] != -1 && "No more spill slots?");
+ Color = NextColors[StackID];
+ UsedColors[StackID].set(Color);
+ NextColors[StackID] = AllColors[StackID].find_next(NextColors[StackID]);
}
+ assert(MFI->getStackID(Color) == MFI->getStackID(FI));
+
// Record the assignment.
Assignments[Color].push_back(li);
- DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
+ LLVM_DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
// Change size and alignment of the allocated slot. If there are multiple
// objects sharing the same slot, then make sure the size and alignment
@@ -305,7 +327,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
BitVector UsedColors(NumObjs);
- DEBUG(dbgs() << "Color spill slot intervals:\n");
+ LLVM_DEBUG(dbgs() << "Color spill slot intervals:\n");
bool Changed = false;
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
@@ -319,7 +341,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
Changed |= (SS != NewSS);
}
- DEBUG(dbgs() << "\nSpill slots after coloring:\n");
+ LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n");
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
@@ -330,8 +352,8 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
#ifndef NDEBUG
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
- DEBUG(SSIntervals[i]->dump());
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(SSIntervals[i]->dump());
+ LLVM_DEBUG(dbgs() << '\n');
#endif
if (!Changed)
@@ -357,10 +379,13 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
}
// Delete unused stack slots.
- while (NextColor != -1) {
- DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n");
- MFI->RemoveStackObject(NextColor);
- NextColor = AllColors.find_next(NextColor);
+ for (int StackID = 0, E = AllColors.size(); StackID != E; ++StackID) {
+ int NextColor = NextColors[StackID];
+ while (NextColor != -1) {
+ LLVM_DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n");
+ MFI->RemoveStackObject(NextColor);
+ NextColor = AllColors[StackID].find_next(NextColor);
+ }
}
return true;
@@ -382,6 +407,8 @@ void StackSlotColoring::RewriteInstruction(MachineInstr &MI,
int NewFI = SlotMapping[OldFI];
if (NewFI == -1 || NewFI == OldFI)
continue;
+
+ assert(MFI->getStackID(OldFI) == MFI->getStackID(NewFI));
MO.setIndex(NewFI);
}
@@ -418,17 +445,21 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
unsigned LoadReg = 0;
unsigned StoreReg = 0;
- if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS)))
+ unsigned LoadSize = 0;
+ unsigned StoreSize = 0;
+ if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS, LoadSize)))
continue;
// Skip the ...pseudo debugging... instructions between a load and store.
- while ((NextMI != E) && NextMI->isDebugValue()) {
+ while ((NextMI != E) && NextMI->isDebugInstr()) {
++NextMI;
++I;
}
if (NextMI == E) continue;
- if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS)))
+ if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS, StoreSize)))
+ continue;
+ if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1 ||
+ LoadSize != StoreSize)
continue;
- if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
++NumDead;
changed = true;
@@ -450,10 +481,13 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
}
bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
- DEBUG({
- dbgs() << "********** Stack Slot Coloring **********\n"
- << "********** Function: " << MF.getName() << '\n';
- });
+ LLVM_DEBUG({
+ dbgs() << "********** Stack Slot Coloring **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ if (skipFunction(MF.getFunction()))
+ return false;
MFI = &MF.getFrameInfo();
TII = MF.getSubtarget().getInstrInfo();
@@ -479,7 +513,9 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
InitializeSlots();
Changed = ColorSlots(MF);
- NextColor = -1;
+ for (int &Next : NextColors)
+ Next = -1;
+
SSIntervals.clear();
for (unsigned i = 0, e = SSRefs.size(); i != e; ++i)
SSRefs[i].clear();
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
index df1eebf43b2b..25cd7802264e 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -7,8 +7,9 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass duplicates basic blocks ending in unconditional branches into
-// the tails of their predecessors, using the TailDuplicator utility class.
+/// \file This pass duplicates basic blocks ending in unconditional branches
+/// into the tails of their predecessors, using the TailDuplicator utility
+/// class.
//
//===----------------------------------------------------------------------===//
@@ -26,38 +27,55 @@ using namespace llvm;
namespace {
-/// Perform tail duplication. Delegates to TailDuplicator
-class TailDuplicatePass : public MachineFunctionPass {
+class TailDuplicateBase : public MachineFunctionPass {
TailDuplicator Duplicator;
-
+ bool PreRegAlloc;
public:
- static char ID;
-
- explicit TailDuplicatePass() : MachineFunctionPass(ID) {}
+ TailDuplicateBase(char &PassID, bool PreRegAlloc)
+ : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {}
bool runOnMachineFunction(MachineFunction &MF) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+class TailDuplicate : public TailDuplicateBase {
+public:
+ static char ID;
+ TailDuplicate() : TailDuplicateBase(ID, false) {
+ initializeTailDuplicatePass(*PassRegistry::getPassRegistry());
+ }
+};
+
+class EarlyTailDuplicate : public TailDuplicateBase {
+public:
+ static char ID;
+ EarlyTailDuplicate() : TailDuplicateBase(ID, true) {
+ initializeEarlyTailDuplicatePass(*PassRegistry::getPassRegistry());
+ }
};
} // end anonymous namespace
-char TailDuplicatePass::ID = 0;
+char TailDuplicate::ID;
+char EarlyTailDuplicate::ID;
-char &llvm::TailDuplicateID = TailDuplicatePass::ID;
+char &llvm::TailDuplicateID = TailDuplicate::ID;
+char &llvm::EarlyTailDuplicateID = EarlyTailDuplicate::ID;
-INITIALIZE_PASS(TailDuplicatePass, DEBUG_TYPE, "Tail Duplication", false, false)
+INITIALIZE_PASS(TailDuplicate, DEBUG_TYPE, "Tail Duplication", false, false)
+INITIALIZE_PASS(EarlyTailDuplicate, "early-tailduplication",
+ "Early Tail Duplication", false, false)
-bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
+bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
-
- // TODO: Querying isSSA() to determine pre-/post-regalloc is fragile, better
- // split this into two passes instead.
- bool PreRegAlloc = MF.getRegInfo().isSSA();
- Duplicator.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ false);
+ Duplicator.initMF(MF, PreRegAlloc, MBPI, /*LayoutMode=*/false);
bool MadeChange = false;
while (Duplicator.tailDuplicateBlocks())
@@ -65,8 +83,3 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
return MadeChange;
}
-
-void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<MachineBranchProbabilityInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
-}
diff --git a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
index f51c884839b3..b118c176a897 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -37,6 +37,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -261,7 +262,7 @@ bool TailDuplicator::tailDuplicateBlocks() {
bool MadeChange = false;
if (PreRegAlloc && TailDupVerify) {
- DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
+ LLVM_DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
VerifyPHIs(*MF, true);
}
@@ -371,6 +372,13 @@ void TailDuplicator::duplicateInstruction(
MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
const DenseSet<unsigned> &UsedByPhi) {
+ // Allow duplication of CFI instructions.
+ if (MI->isCFIInstruction()) {
+ BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()),
+ TII->get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex(
+ MI->getOperand(0).getCFIIndex());
+ return;
+ }
MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI);
if (PreRegAlloc) {
for (unsigned i = 0, e = NewMI.getNumOperands(); i != e; ++i) {
@@ -585,7 +593,13 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
unsigned InstrCount = 0;
for (MachineInstr &MI : TailBB) {
// Non-duplicable things shouldn't be tail-duplicated.
- if (MI.isNotDuplicable())
+ // CFI instructions are marked as non-duplicable, because Darwin compact
+ // unwind info emission can't handle multiple prologue setups. In case of
+ // DWARF, allow them be duplicated, so that their existence doesn't prevent
+ // tail duplication of some basic blocks, that would be duplicated otherwise.
+ if (MI.isNotDuplicable() &&
+ (TailBB.getParent()->getTarget().getTargetTriple().isOSDarwin() ||
+ !MI.isCFIInstruction()))
return false;
// Convergent instructions can be duplicated only if doing so doesn't add
@@ -605,7 +619,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (PreRegAlloc && MI.isCall())
return false;
- if (!MI.isPHI() && !MI.isDebugValue())
+ if (!MI.isPHI() && !MI.isMetaInstruction())
InstrCount += 1;
if (InstrCount > MaxDuplicateCount)
@@ -704,8 +718,8 @@ bool TailDuplicator::duplicateSimpleBB(
continue;
Changed = true;
- DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
- << "From simple Succ: " << *TailBB);
+ LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From simple Succ: " << *TailBB);
MachineBasicBlock *NewTarget = *TailBB->succ_begin();
MachineBasicBlock *NextBB = PredBB->getNextNode();
@@ -785,8 +799,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
MachineBasicBlock *ForcedLayoutPred,
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
SmallVectorImpl<MachineInstr *> &Copies) {
- DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB)
- << '\n');
+ LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB)
+ << '\n');
DenseSet<unsigned> UsedByPhi;
getRegsUsedByPHIs(*TailBB, &UsedByPhi);
@@ -816,8 +830,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
if (IsLayoutSuccessor)
continue;
- DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
- << "From Succ: " << *TailBB);
+ LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From Succ: " << *TailBB);
TDBBs.push_back(PredBB);
@@ -879,8 +893,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
(!PriorTBB || PriorTBB == TailBB) &&
TailBB->pred_size() == 1 &&
!TailBB->hasAddressTaken()) {
- DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
- << "From MBB: " << *TailBB);
+ LLVM_DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
+ << "From MBB: " << *TailBB);
// There may be a branch to the layout successor. This is unlikely but it
// happens. The correct thing to do is to remove the branch before
// duplicating the instructions in all cases.
@@ -985,7 +999,7 @@ void TailDuplicator::removeDeadBlock(
MachineBasicBlock *MBB,
function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
assert(MBB->pred_empty() && "MBB must be dead!");
- DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+ LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
if (RemovalCallback)
(*RemovalCallback)(MBB);
diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index b2151eb49655..f0cfa2fbe4fd 100644
--- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -36,6 +36,13 @@ bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
return Attr.getValueAsString() == "true";
}
+bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
+ assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
+ MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
+ !MF.getFunction().hasFnAttribute(Attribute::UWTable));
+ return false;
+}
+
/// Returns the displacement from the frame register to the stack
/// frame of the specified index, along with the frame register used
/// (in output arg FrameReg). This is the default implementation which
@@ -85,6 +92,19 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (MF.getFunction().hasFnAttribute(Attribute::Naked))
return;
+ // Noreturn+nounwind functions never restore CSR, so no saves are needed.
+ // Purely noreturn functions may still return through throws, so those must
+ // save CSR for caller exception handlers.
+ //
+ // If the function uses longjmp to break out of its current path of
+ // execution we do not need the CSR spills either: setjmp stores all CSRs
+ // it was called with into the jmp_buf, which longjmp then restores.
+ if (MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
+ MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
+ !MF.getFunction().hasFnAttribute(Attribute::UWTable) &&
+ enableCalleeSaveSkip(MF))
+ return;
+
// Functions which call __builtin_unwind_init get all their registers saved.
bool CallsUnwindInit = MF.callsUnwindInit();
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -100,7 +120,16 @@ unsigned TargetFrameLowering::getStackAlignmentSkew(
// When HHVM function is called, the stack is skewed as the return address
// is removed from the stack before we enter the function.
if (LLVM_UNLIKELY(MF.getFunction().getCallingConv() == CallingConv::HHVM))
- return MF.getTarget().getPointerSize();
+ return MF.getTarget().getAllocaPointerSize();
return 0;
}
+
+int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
+ llvm_unreachable("getInitialCFAOffset() not implemented!");
+}
+
+unsigned TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF)
+ const {
+ llvm_unreachable("getInitialCFARegister() not implemented!");
+} \ No newline at end of file
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
index bd90ed5b55b8..963f8178b509 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -174,6 +174,14 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
bool Reg2IsUndef = MI.getOperand(Idx2).isUndef();
bool Reg1IsInternal = MI.getOperand(Idx1).isInternalRead();
bool Reg2IsInternal = MI.getOperand(Idx2).isInternalRead();
+ // Avoid calling isRenamable for virtual registers since we assert that
+ // renamable property is only queried/set for physical registers.
+ bool Reg1IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg1)
+ ? MI.getOperand(Idx1).isRenamable()
+ : false;
+ bool Reg2IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg2)
+ ? MI.getOperand(Idx2).isRenamable()
+ : false;
// If destination is tied to either of the commuted source register, then
// it must be updated.
if (HasDef && Reg0 == Reg1 &&
@@ -211,6 +219,12 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
CommutedMI->getOperand(Idx1).setIsUndef(Reg2IsUndef);
CommutedMI->getOperand(Idx2).setIsInternalRead(Reg1IsInternal);
CommutedMI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal);
+ // Avoid calling setIsRenamable for virtual registers since we assert that
+ // renamable property is only queried/set for physical registers.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg1))
+ CommutedMI->getOperand(Idx2).setIsRenamable(Reg1IsRenamable);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg2))
+ CommutedMI->getOperand(Idx1).setIsRenamable(Reg2IsRenamable);
return CommutedMI;
}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
index b29a33ac1c14..43f4bad595e3 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -50,6 +49,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
@@ -118,7 +118,7 @@ static cl::opt<int> MinPercentageForPredictableBranch(
void TargetLoweringBase::InitLibcalls(const Triple &TT) {
#define HANDLE_LIBCALL(code, name) \
setLibcallName(RTLIB::code, name);
-#include "llvm/CodeGen/RuntimeLibcalls.def"
+#include "llvm/IR/RuntimeLibcalls.def"
#undef HANDLE_LIBCALL
// Initialize calling conventions to their default.
for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC)
@@ -192,6 +192,9 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
return FPEXT_F64_F128;
else if (RetVT == MVT::ppcf128)
return FPEXT_F64_PPCF128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::f128)
+ return FPEXT_F80_F128;
}
return UNKNOWN_LIBCALL;
@@ -227,6 +230,9 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
return FPROUND_F128_F64;
if (OpVT == MVT::ppcf128)
return FPROUND_PPCF128_F64;
+ } else if (RetVT == MVT::f80) {
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F80;
}
return UNKNOWN_LIBCALL;
@@ -529,6 +535,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
// Perform these initializations only once.
MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove =
MaxLoadsPerMemcmp = 8;
+ MaxGluedStoresPerMemcpy = 0;
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =
MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;
UseUnderscoreSetJmp = false;
@@ -614,6 +621,12 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SUBCARRY, VT, Expand);
setOperationAction(ISD::SETCCCARRY, VT, Expand);
+ // ADDC/ADDE/SUBC/SUBE default to expand.
+ setOperationAction(ISD::ADDC, VT, Expand);
+ setOperationAction(ISD::ADDE, VT, Expand);
+ setOperationAction(ISD::SUBC, VT, Expand);
+ setOperationAction(ISD::SUBE, VT, Expand);
+
// These default to Expand so they will be expanded to CTLZ/CTTZ by default.
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
@@ -679,12 +692,13 @@ MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
return MVT::getIntegerVT(8 * DL.getPointerSize(0));
}
-EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy,
- const DataLayout &DL) const {
+EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
+ bool LegalTypes) const {
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
if (LHSTy.isVector())
return LHSTy;
- return getScalarShiftAmountTy(DL, LHSTy);
+ return LegalTypes ? getScalarShiftAmountTy(DL, LHSTy)
+ : getPointerTy(DL);
}
bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
@@ -979,6 +993,36 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
return MBB;
}
+MachineBasicBlock *
+TargetLoweringBase::emitXRayCustomEvent(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+ assert(MI.getOpcode() == TargetOpcode::PATCHABLE_EVENT_CALL &&
+ "Called emitXRayCustomEvent on the wrong MI!");
+ auto &MF = *MI.getMF();
+ auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc());
+ for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx)
+ MIB.add(MI.getOperand(OpIdx));
+
+ MBB->insert(MachineBasicBlock::iterator(MI), MIB);
+ MI.eraseFromParent();
+ return MBB;
+}
+
+MachineBasicBlock *
+TargetLoweringBase::emitXRayTypedEvent(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+ assert(MI.getOpcode() == TargetOpcode::PATCHABLE_TYPED_EVENT_CALL &&
+ "Called emitXRayTypedEvent on the wrong MI!");
+ auto &MF = *MI.getMF();
+ auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc());
+ for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx)
+ MIB.add(MI.getOperand(OpIdx));
+
+ MBB->insert(MachineBasicBlock::iterator(MI), MIB);
+ MI.eraseFromParent();
+ return MBB;
+}
+
/// findRepresentativeClass - Return the largest legal super-reg register class
/// of the register class for the specified type and its associated "cost".
// This function is in TargetLowering because it uses RegClassForVT which would
@@ -1587,13 +1631,16 @@ Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const {
// Currently only support "standard" __stack_chk_guard.
// TODO: add LOAD_STACK_GUARD support.
void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
- M.getOrInsertGlobal("__stack_chk_guard", Type::getInt8PtrTy(M.getContext()));
+ if (!M.getNamedValue("__stack_chk_guard"))
+ new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false,
+ GlobalVariable::ExternalLinkage,
+ nullptr, "__stack_chk_guard");
}
// Currently only support "standard" __stack_chk_guard.
// TODO: add LOAD_STACK_GUARD support.
Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
- return M.getGlobalVariable("__stack_chk_guard", true);
+ return M.getNamedValue("__stack_chk_guard");
}
Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
@@ -1683,7 +1730,7 @@ static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) {
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
SmallVector<StringRef, 4> OverrideVector;
- SplitString(Override, OverrideVector, ",");
+ Override.split(OverrideVector, ',');
unsigned NumArgs = OverrideVector.size();
// Check if "all", "none", or "default" was specified.
@@ -1743,7 +1790,7 @@ static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) {
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
SmallVector<StringRef, 4> OverrideVector;
- SplitString(Override, OverrideVector, ",");
+ Override.split(OverrideVector, ',');
unsigned NumArgs = OverrideVector.size();
// Check if "all", "default", or "none" was specified.
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 24d4baa31e1f..b5dd2d4cca89 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -91,23 +91,86 @@ static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags,
// ELF
//===----------------------------------------------------------------------===//
-void TargetLoweringObjectFileELF::emitModuleMetadata(
- MCStreamer &Streamer, Module &M, const TargetMachine &TM) const {
+void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
+ const TargetMachine &TgtM) {
+ TargetLoweringObjectFile::Initialize(Ctx, TgtM);
+ TM = &TgtM;
+}
+
+void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
+ Module &M) const {
+ auto &C = getContext();
+
+ if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
+ auto *S = C.getELFSection(".linker-options", ELF::SHT_LLVM_LINKER_OPTIONS,
+ ELF::SHF_EXCLUDE);
+
+ Streamer.SwitchSection(S);
+
+ for (const auto &Operand : LinkerOptions->operands()) {
+ if (cast<MDNode>(Operand)->getNumOperands() != 2)
+ report_fatal_error("invalid llvm.linker.options");
+ for (const auto &Option : cast<MDNode>(Operand)->operands()) {
+ Streamer.EmitBytes(cast<MDString>(Option)->getString());
+ Streamer.EmitIntValue(0, 1);
+ }
+ }
+ }
+
unsigned Version = 0;
unsigned Flags = 0;
StringRef Section;
GetObjCImageInfo(M, Version, Flags, Section);
- if (Section.empty())
+ if (!Section.empty()) {
+ auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+ Streamer.SwitchSection(S);
+ Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
+ Streamer.EmitIntValue(Version, 4);
+ Streamer.EmitIntValue(Flags, 4);
+ Streamer.AddBlankLine();
+ }
+
+ SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+ M.getModuleFlagsMetadata(ModuleFlags);
+
+ MDNode *CFGProfile = nullptr;
+
+ for (const auto &MFE : ModuleFlags) {
+ StringRef Key = MFE.Key->getString();
+ if (Key == "CG Profile") {
+ CFGProfile = cast<MDNode>(MFE.Val);
+ break;
+ }
+ }
+
+ if (!CFGProfile)
return;
- auto &C = getContext();
- auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
- Streamer.SwitchSection(S);
- Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
- Streamer.EmitIntValue(Version, 4);
- Streamer.EmitIntValue(Flags, 4);
- Streamer.AddBlankLine();
+ auto GetSym = [this](const MDOperand &MDO) -> MCSymbol * {
+ if (!MDO)
+ return nullptr;
+ auto V = cast<ValueAsMetadata>(MDO);
+ const Function *F = cast<Function>(V->getValue());
+ return TM->getSymbol(F);
+ };
+
+ for (const auto &Edge : CFGProfile->operands()) {
+ MDNode *E = cast<MDNode>(Edge);
+ const MCSymbol *From = GetSym(E->getOperand(0));
+ const MCSymbol *To = GetSym(E->getOperand(1));
+ // Skip null functions. This can happen if functions are dead stripped after
+ // the CGProfile pass has been run.
+ if (!From || !To)
+ continue;
+ uint64_t Count = cast<ConstantAsMetadata>(E->getOperand(2))
+ ->getValue()
+ ->getUniqueInteger()
+ .getZExtValue();
+ Streamer.emitCGProfileEntry(
+ MCSymbolRefExpr::create(From, MCSymbolRefExpr::VK_None, C),
+ MCSymbolRefExpr::create(To, MCSymbolRefExpr::VK_None, C), Count);
+ }
}
MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
@@ -170,7 +233,7 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
}
static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {
- // N.B.: The defaults used in here are no the same ones used in MC.
+ // N.B.: The defaults used in here are not the same ones used in MC.
// We follow gcc, MC follows gas. For example, given ".section .eh_frame",
// both gas and MC will produce a section with no flags. Given
// section(".eh_frame") gcc will produce:
@@ -183,7 +246,7 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {
if (Name.empty() || Name[0] != '.') return K;
- // Some lame default implementation based on some magic section names.
+ // Default implementation based on some magic section names.
if (Name == ".bss" ||
Name.startswith(".bss.") ||
Name.startswith(".gnu.linkonce.b.") ||
@@ -335,7 +398,8 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
/*EntrySize=*/0, Group, UniqueID, AssociatedSymbol);
// Make sure that we did not get some other section with incompatible sh_link.
// This should not be possible due to UniqueID code above.
- assert(Section->getAssociatedSymbol() == AssociatedSymbol);
+ assert(Section->getAssociatedSymbol() == AssociatedSymbol &&
+ "Associated symbol mismatch between sections");
return Section;
}
@@ -617,8 +681,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
}
}
-void TargetLoweringObjectFileMachO::emitModuleMetadata(
- MCStreamer &Streamer, Module &M, const TargetMachine &TM) const {
+void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
+ Module &M) const {
// Emit the linker options if present.
if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
for (const auto &Option : LinkerOptions->operands()) {
@@ -727,6 +791,8 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
if (GO->isWeakForLinker()) {
if (Kind.isReadOnly())
return ConstTextCoalSection;
+ if (Kind.isReadOnlyWithRel())
+ return ConstDataCoalSection;
return DataCoalSection;
}
@@ -1040,7 +1106,7 @@ MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
Selection);
}
-static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) {
+static StringRef getCOFFSectionNameForUniqueGlobal(SectionKind Kind) {
if (Kind.isText())
return ".text";
if (Kind.isBSS())
@@ -1063,7 +1129,8 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
EmitUniquedSection = TM.getDataSections();
if ((EmitUniquedSection && !Kind.isCommon()) || GO->hasComdat()) {
- const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
+ SmallString<256> Name = getCOFFSectionNameForUniqueGlobal(Kind);
+
unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
@@ -1083,6 +1150,12 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
if (!ComdatGV->hasPrivateLinkage()) {
MCSymbol *Sym = TM.getSymbol(ComdatGV);
StringRef COMDATSymName = Sym->getName();
+
+ // Append "$symbol" to the section name when targetting mingw. The ld.bfd
+ // COFF linker will not properly handle comdats otherwise.
+ if (getTargetTriple().isWindowsGNUEnvironment())
+ raw_svector_ostream(Name) << '$' << COMDATSymName;
+
return getContext().getCOFFSection(Name, Characteristics, Kind,
COMDATSymName, Selection, UniqueID);
} else {
@@ -1140,17 +1213,18 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
StringRef COMDATSymName = Sym->getName();
SectionKind Kind = SectionKind::getReadOnly();
- const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
+ StringRef SecName = getCOFFSectionNameForUniqueGlobal(Kind);
unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
unsigned UniqueID = NextUniqueID++;
- return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName,
- COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID);
+ return getContext().getCOFFSection(
+ SecName, Characteristics, Kind, COMDATSymName,
+ COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID);
}
-void TargetLoweringObjectFileCOFF::emitModuleMetadata(
- MCStreamer &Streamer, Module &M, const TargetMachine &TM) const {
+void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
+ Module &M) const {
if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
// Emit the linker options to the linker .drectve section. According to the
// spec, this section is a space-separated string containing flags for
@@ -1250,19 +1324,136 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
emitLinkerFlagsForGlobalCOFF(OS, GV, getTargetTriple(), getMangler());
}
+void TargetLoweringObjectFileCOFF::emitLinkerFlagsForUsed(
+ raw_ostream &OS, const GlobalValue *GV) const {
+ emitLinkerFlagsForUsedCOFF(OS, GV, getTargetTriple(), getMangler());
+}
+
+const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference(
+ const GlobalValue *LHS, const GlobalValue *RHS,
+ const TargetMachine &TM) const {
+ const Triple &T = TM.getTargetTriple();
+ if (!T.isKnownWindowsMSVCEnvironment() &&
+ !T.isWindowsItaniumEnvironment() &&
+ !T.isWindowsCoreCLREnvironment())
+ return nullptr;
+
+ // Our symbols should exist in address space zero, cowardly no-op if
+ // otherwise.
+ if (LHS->getType()->getPointerAddressSpace() != 0 ||
+ RHS->getType()->getPointerAddressSpace() != 0)
+ return nullptr;
+
+ // Both ptrtoint instructions must wrap global objects:
+ // - Only global variables are eligible for image relative relocations.
+ // - The subtrahend refers to the special symbol __ImageBase, a GlobalVariable.
+ // We expect __ImageBase to be a global variable without a section, externally
+ // defined.
+ //
+ // It should look something like this: @__ImageBase = external constant i8
+ if (!isa<GlobalObject>(LHS) || !isa<GlobalVariable>(RHS) ||
+ LHS->isThreadLocal() || RHS->isThreadLocal() ||
+ RHS->getName() != "__ImageBase" || !RHS->hasExternalLinkage() ||
+ cast<GlobalVariable>(RHS)->hasInitializer() || RHS->hasSection())
+ return nullptr;
+
+ return MCSymbolRefExpr::create(TM.getSymbol(LHS),
+ MCSymbolRefExpr::VK_COFF_IMGREL32,
+ getContext());
+}
+
+static std::string APIntToHexString(const APInt &AI) {
+ unsigned Width = (AI.getBitWidth() / 8) * 2;
+ std::string HexString = utohexstr(AI.getLimitedValue(), /*LowerCase=*/true);
+ unsigned Size = HexString.size();
+ assert(Width >= Size && "hex string is too large!");
+ HexString.insert(HexString.begin(), Width - Size, '0');
+
+ return HexString;
+}
+
+static std::string scalarConstantToHexString(const Constant *C) {
+ Type *Ty = C->getType();
+ if (isa<UndefValue>(C)) {
+ return APIntToHexString(APInt::getNullValue(Ty->getPrimitiveSizeInBits()));
+ } else if (const auto *CFP = dyn_cast<ConstantFP>(C)) {
+ return APIntToHexString(CFP->getValueAPF().bitcastToAPInt());
+ } else if (const auto *CI = dyn_cast<ConstantInt>(C)) {
+ return APIntToHexString(CI->getValue());
+ } else {
+ unsigned NumElements;
+ if (isa<VectorType>(Ty))
+ NumElements = Ty->getVectorNumElements();
+ else
+ NumElements = Ty->getArrayNumElements();
+ std::string HexString;
+ for (int I = NumElements - 1, E = -1; I != E; --I)
+ HexString += scalarConstantToHexString(C->getAggregateElement(I));
+ return HexString;
+ }
+}
+
+MCSection *TargetLoweringObjectFileCOFF::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C,
+ unsigned &Align) const {
+ if (Kind.isMergeableConst() && C &&
+ getContext().getAsmInfo()->hasCOFFComdatConstants()) {
+ // This creates comdat sections with the given symbol name, but unless
+ // AsmPrinter::GetCPISymbol actually makes the symbol global, the symbol
+ // will be created with a null storage class, which makes GNU binutils
+ // error out.
+ const unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_LNK_COMDAT;
+ std::string COMDATSymName;
+ if (Kind.isMergeableConst4()) {
+ if (Align <= 4) {
+ COMDATSymName = "__real@" + scalarConstantToHexString(C);
+ Align = 4;
+ }
+ } else if (Kind.isMergeableConst8()) {
+ if (Align <= 8) {
+ COMDATSymName = "__real@" + scalarConstantToHexString(C);
+ Align = 8;
+ }
+ } else if (Kind.isMergeableConst16()) {
+ // FIXME: These may not be appropriate for non-x86 architectures.
+ if (Align <= 16) {
+ COMDATSymName = "__xmm@" + scalarConstantToHexString(C);
+ Align = 16;
+ }
+ } else if (Kind.isMergeableConst32()) {
+ if (Align <= 32) {
+ COMDATSymName = "__ymm@" + scalarConstantToHexString(C);
+ Align = 32;
+ }
+ }
+
+ if (!COMDATSymName.empty())
+ return getContext().getCOFFSection(".rdata", Characteristics, Kind,
+ COMDATSymName,
+ COFF::IMAGE_COMDAT_SELECT_ANY);
+ }
+
+ return TargetLoweringObjectFile::getSectionForConstant(DL, Kind, C, Align);
+}
+
+
//===----------------------------------------------------------------------===//
// Wasm
//===----------------------------------------------------------------------===//
-static void checkWasmComdat(const GlobalValue *GV) {
+static const Comdat *getWasmComdat(const GlobalValue *GV) {
const Comdat *C = GV->getComdat();
if (!C)
- return;
+ return nullptr;
- // TODO(sbc): At some point we may need COMDAT support but currently
- // they are not supported.
- report_fatal_error("WebAssembly doesn't support COMDATs, '" + C->getName() +
- "' cannot be lowered.");
+ if (C->getSelectionKind() != Comdat::Any)
+ report_fatal_error("WebAssembly COMDATs only support "
+ "SelectionKind::Any, '" + C->getName() + "' cannot be "
+ "lowered.");
+
+ return C;
}
static SectionKind getWasmKindForNamedSection(StringRef Name, SectionKind K) {
@@ -1277,17 +1468,32 @@ static SectionKind getWasmKindForNamedSection(StringRef Name, SectionKind K) {
MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ // We don't support explict section names for functions in the wasm object
+ // format. Each function has to be in its own unique section.
+ if (isa<Function>(GO)) {
+ return SelectSectionForGlobal(GO, Kind, TM);
+ }
+
StringRef Name = GO->getSection();
- checkWasmComdat(GO);
+
Kind = getWasmKindForNamedSection(Name, Kind);
- return getContext().getWasmSection(Name, Kind);
+
+ StringRef Group = "";
+ if (const Comdat *C = getWasmComdat(GO)) {
+ Group = C->getName();
+ }
+
+ return getContext().getWasmSection(Name, Kind, Group,
+ MCContext::GenericSectionID);
}
static MCSectionWasm *selectWasmSectionForGlobal(
MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
const TargetMachine &TM, bool EmitUniqueSection, unsigned *NextUniqueID) {
StringRef Group = "";
- checkWasmComdat(GO);
+ if (const Comdat *C = getWasmComdat(GO)) {
+ Group = C->getName();
+ }
bool UniqueSectionNames = TM.getUniqueSectionNames();
SmallString<128> Name = getSectionPrefixForGlobal(Kind);
diff --git a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
index 3e6ad3eeef0f..3fca2f4ee4fe 100644
--- a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -41,6 +41,7 @@
#include "llvm/Support/Threading.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
#include <cassert>
#include <string>
@@ -80,6 +81,9 @@ static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
cl::desc("Disable Machine LICM"));
static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
cl::desc("Disable Machine Sinking"));
+static cl::opt<bool> DisablePostRAMachineSink("disable-postra-machine-sink",
+ cl::Hidden,
+ cl::desc("Disable PostRA Machine Sinking"));
static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
cl::desc("Disable Loop Strength Reduction Pass"));
static cl::opt<bool> DisableConstantHoisting("disable-constant-hoisting",
@@ -94,10 +98,9 @@ static cl::opt<bool> EnableImplicitNullChecks(
"enable-implicit-null-checks",
cl::desc("Fold null checks into faulting memory operations"),
cl::init(false), cl::Hidden);
-static cl::opt<bool>
- EnableMergeICmps("enable-mergeicmps",
- cl::desc("Merge ICmp chains into a single memcmp"),
- cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableMergeICmps("disable-mergeicmps",
+ cl::desc("Disable MergeICmps Pass"),
+ cl::init(false), cl::Hidden);
static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
cl::desc("Print LLVM IR produced by the loop-reduce pass"));
static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
@@ -108,14 +111,16 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
cl::desc("Verify generated machine code"),
cl::init(false),
cl::ZeroOrMore);
-static cl::opt<bool> EnableMachineOutliner("enable-machine-outliner",
- cl::Hidden,
- cl::desc("Enable machine outliner"));
-static cl::opt<bool> EnableLinkOnceODROutlining(
- "enable-linkonceodr-outlining",
- cl::Hidden,
- cl::desc("Enable the machine outliner on linkonceodr functions"),
- cl::init(false));
+enum RunOutliner { AlwaysOutline, NeverOutline, TargetDefault };
+// Enable or disable the MachineOutliner.
+static cl::opt<RunOutliner> EnableMachineOutliner(
+ "enable-machine-outliner", cl::desc("Enable the machine outliner"),
+ cl::Hidden, cl::ValueOptional, cl::init(TargetDefault),
+ cl::values(clEnumValN(AlwaysOutline, "always",
+ "Run on all functions guaranteed to be beneficial"),
+ clEnumValN(NeverOutline, "never", "Disable all outlining"),
+ // Sentinel value for unspecified option.
+ clEnumValN(AlwaysOutline, "", "")));
// Enable or disable FastISel. Both options are needed, because
// FastISel is enabled by default with -fast, and we wish to be
// able to enable or disable fast-isel independently from -O0.
@@ -123,9 +128,9 @@ static cl::opt<cl::boolOrDefault>
EnableFastISelOption("fast-isel", cl::Hidden,
cl::desc("Enable the \"fast\" instruction selector"));
-static cl::opt<cl::boolOrDefault>
- EnableGlobalISel("global-isel", cl::Hidden,
- cl::desc("Enable the \"global\" instruction selector"));
+static cl::opt<cl::boolOrDefault> EnableGlobalISelOption(
+ "global-isel", cl::Hidden,
+ cl::desc("Enable the \"global\" instruction selector"));
static cl::opt<std::string> PrintMachineInstrs(
"print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"),
@@ -226,7 +231,7 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,
if (StandardID == &TailDuplicateID)
return applyDisable(TargetID, DisableTailDuplicate);
- if (StandardID == &TargetPassConfig::EarlyTailDuplicateID)
+ if (StandardID == &EarlyTailDuplicateID)
return applyDisable(TargetID, DisableEarlyTailDup);
if (StandardID == &MachineBlockPlacementID)
@@ -241,18 +246,21 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,
if (StandardID == &EarlyIfConverterID)
return applyDisable(TargetID, DisableEarlyIfConversion);
- if (StandardID == &MachineLICMID)
+ if (StandardID == &EarlyMachineLICMID)
return applyDisable(TargetID, DisableMachineLICM);
if (StandardID == &MachineCSEID)
return applyDisable(TargetID, DisableMachineCSE);
- if (StandardID == &TargetPassConfig::PostRAMachineLICMID)
+ if (StandardID == &MachineLICMID)
return applyDisable(TargetID, DisablePostRAMachineLICM);
if (StandardID == &MachineSinkingID)
return applyDisable(TargetID, DisableMachineSink);
+ if (StandardID == &PostRAMachineSinkingID)
+ return applyDisable(TargetID, DisablePostRAMachineSink);
+
if (StandardID == &MachineCopyPropagationID)
return applyDisable(TargetID, DisableCopyProp);
@@ -267,10 +275,6 @@ INITIALIZE_PASS(TargetPassConfig, "targetpassconfig",
"Target Pass Configuration", false, false)
char TargetPassConfig::ID = 0;
-// Pseudo Pass IDs.
-char TargetPassConfig::EarlyTailDuplicateID = 0;
-char TargetPassConfig::PostRAMachineLICMID = 0;
-
namespace {
struct InsertedPass {
@@ -366,10 +370,6 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
- // Substitute Pseudo Pass IDs for real ones.
- substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
- substitutePass(&PostRAMachineLICMID, &MachineLICMID);
-
if (StringRef(PrintMachineInstrs.getValue()).equals(""))
TM.Options.PrintMachineCode = true;
@@ -604,7 +604,7 @@ void TargetPassConfig::addIRPasses() {
// loads and compares. ExpandMemCmpPass then tries to expand those calls
// into optimally-sized loads and compares. The transforms are enabled by a
// target lowering hook.
- if (EnableMergeICmps)
+ if (!DisableMergeICmps)
addPass(createMergeICmpsPass());
addPass(createExpandMemCmpPass());
}
@@ -662,6 +662,14 @@ void TargetPassConfig::addPassesToHandleExceptions() {
addPass(createWinEHPass());
addPass(createDwarfEHPass());
break;
+ case ExceptionHandling::Wasm:
+ // Wasm EH uses Windows EH instructions, but it does not need to demote PHIs
+ // on catchpads and cleanuppads because it does not outline them into
+ // funclets. Catchswitch blocks are not lowered in SelectionDAG, so we
+ // should remove PHIs there.
+ addPass(createWinEHPass(/*DemoteCatchSwitchPHIOnly=*/false));
+ addPass(createWasmEHPass());
+ break;
case ExceptionHandling::None:
addPass(createLowerInvokePass());
@@ -704,19 +712,18 @@ void TargetPassConfig::addISelPrepare() {
}
bool TargetPassConfig::addCoreISelPasses() {
- // Enable FastISel with -fast, but allow that to be overridden.
+ // Enable FastISel with -fast-isel, but allow that to be overridden.
TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
if (EnableFastISelOption == cl::BOU_TRUE ||
(TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel()))
TM->setFastISel(true);
- // Ask the target for an isel.
- // Enable GlobalISel if the target wants to, but allow that to be overriden.
+ // Ask the target for an instruction selector.
// Explicitly enabling fast-isel should override implicitly enabled
// global-isel.
- if (EnableGlobalISel == cl::BOU_TRUE ||
- (EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled() &&
- EnableFastISelOption != cl::BOU_TRUE)) {
+ if (EnableGlobalISelOption == cl::BOU_TRUE ||
+ (EnableGlobalISelOption == cl::BOU_UNSET &&
+ TM->Options.EnableGlobalISel && EnableFastISelOption != cl::BOU_TRUE)) {
TM->setFastISel(false);
if (addIRTranslator())
@@ -755,7 +762,7 @@ bool TargetPassConfig::addCoreISelPasses() {
}
bool TargetPassConfig::addISelPasses() {
- if (TM->Options.EmulatedTLS)
+ if (TM->useEmulatedTLS())
addPass(createLowerEmuTLSPass());
addPass(createPreISelIntrinsicLoweringPass());
@@ -844,8 +851,10 @@ void TargetPassConfig::addMachinePasses() {
addPostRegAlloc();
// Insert prolog/epilog code. Eliminate abstract frame index references...
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None) {
+ addPass(&PostRAMachineSinkingID);
addPass(&ShrinkWrapID);
+ }
// Prolog/Epilog inserter needs a TargetMachine to instantiate. But only
// do so if it hasn't been disabled, substituted, or overridden.
@@ -904,8 +913,14 @@ void TargetPassConfig::addMachinePasses() {
addPass(&XRayInstrumentationID, false);
addPass(&PatchableFunctionID, false);
- if (EnableMachineOutliner)
- PM->add(createMachineOutlinerPass(EnableLinkOnceODROutlining));
+ if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None &&
+ EnableMachineOutliner != NeverOutline) {
+ bool RunOnAllFunctions = (EnableMachineOutliner == AlwaysOutline);
+ bool AddOutliner = RunOnAllFunctions ||
+ TM->Options.SupportsDefaultOutlining;
+ if (AddOutliner)
+ addPass(createMachineOutlinerPass(RunOnAllFunctions));
+ }
// Add passes that directly emit MI after all other MI passes.
addPreEmitPass2();
@@ -941,7 +956,7 @@ void TargetPassConfig::addMachineSSAOptimization() {
// loop info, just like LICM and CSE below.
addILPOpts();
- addPass(&MachineLICMID, false);
+ addPass(&EarlyMachineLICMID, false);
addPass(&MachineCSEID, false);
addPass(&MachineSinkingID);
@@ -1090,10 +1105,14 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
// kill markers.
addPass(&StackSlotColoringID);
+ // Copy propagate to forward register uses and try to eliminate COPYs that
+ // were not coalesced.
+ addPass(&MachineCopyPropagationID);
+
// Run post-ra machine LICM to hoist reloads / remats.
//
// FIXME: can this move into MachineLateOptimization?
- addPass(&PostRAMachineLICMID);
+ addPass(&MachineLICMID);
}
}
@@ -1135,18 +1154,13 @@ void TargetPassConfig::addBlockPlacement() {
//===---------------------------------------------------------------------===//
/// GlobalISel Configuration
//===---------------------------------------------------------------------===//
-
-bool TargetPassConfig::isGlobalISelEnabled() const {
- return false;
-}
-
bool TargetPassConfig::isGlobalISelAbortEnabled() const {
if (EnableGlobalISelAbort.getNumOccurrences() > 0)
return EnableGlobalISelAbort == 1;
// When no abort behaviour is specified, we don't abort if the target says
// that GISel is enabled.
- return !isGlobalISelEnabled();
+ return !TM->Options.EnableGlobalISel;
}
bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const {
diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index f03c3b8300f3..661dc18f7a85 100644
--- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -19,15 +19,16 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
@@ -86,18 +87,24 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
namespace llvm {
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI,
- unsigned SubIdx) {
- return Printable([Reg, TRI, SubIdx](raw_ostream &OS) {
+ unsigned SubIdx, const MachineRegisterInfo *MRI) {
+ return Printable([Reg, TRI, SubIdx, MRI](raw_ostream &OS) {
if (!Reg)
- OS << "%noreg";
+ OS << "$noreg";
else if (TargetRegisterInfo::isStackSlot(Reg))
OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
- else if (TargetRegisterInfo::isVirtualRegister(Reg))
- OS << '%' << TargetRegisterInfo::virtReg2Index(Reg);
+ else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ StringRef Name = MRI ? MRI->getVRegName(Reg) : "";
+ if (Name != "") {
+ OS << '%' << Name;
+ } else {
+ OS << '%' << TargetRegisterInfo::virtReg2Index(Reg);
+ }
+ }
else if (!TRI)
- OS << '%' << "physreg" << Reg;
+ OS << '$' << "physreg" << Reg;
else if (Reg < TRI->getNumRegs()) {
- OS << '%';
+ OS << '$';
printLowerCase(TRI->getName(Reg), OS);
} else
llvm_unreachable("Register kind is unsupported.");
@@ -338,7 +345,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
return BestRC;
}
-/// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
+/// Check if the registers defined by the pair (RegisterClass, SubReg)
/// share the same register file.
static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
const TargetRegisterClass *DefRC,
@@ -436,7 +443,8 @@ bool TargetRegisterInfo::needsStackRealignment(
if (F.hasFnAttribute("stackrealign") || requiresRealignment) {
if (canRealignStack(MF))
return true;
- DEBUG(dbgs() << "Can't realign function's stack: " << F.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Can't realign function's stack: " << F.getName()
+ << "\n");
}
return false;
}
@@ -450,6 +458,51 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
return true;
}
+unsigned TargetRegisterInfo::getRegSizeInBits(unsigned Reg,
+ const MachineRegisterInfo &MRI) const {
+ const TargetRegisterClass *RC{};
+ if (isPhysicalRegister(Reg)) {
+ // The size is not directly available for physical registers.
+ // Instead, we need to access a register class that contains Reg and
+ // get the size of that register class.
+ RC = getMinimalPhysRegClass(Reg);
+ } else {
+ LLT Ty = MRI.getType(Reg);
+ unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0;
+ // If Reg is not a generic register, query the register class to
+ // get its size.
+ if (RegSize)
+ return RegSize;
+ // Since Reg is not a generic register, it must have a register class.
+ RC = MRI.getRegClass(Reg);
+ }
+ assert(RC && "Unable to deduce the register class");
+ return getRegSizeInBits(*RC);
+}
+
+unsigned
+TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg,
+ const MachineRegisterInfo *MRI) const {
+ while (true) {
+ const MachineInstr *MI = MRI->getVRegDef(SrcReg);
+ if (!MI->isCopyLike())
+ return SrcReg;
+
+ unsigned CopySrcReg;
+ if (MI->isCopy())
+ CopySrcReg = MI->getOperand(1).getReg();
+ else {
+ assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike");
+ CopySrcReg = MI->getOperand(2).getReg();
+ }
+
+ if (!isVirtualRegister(CopySrcReg))
+ return CopySrcReg;
+
+ SrcReg = CopySrcReg;
+ }
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD
void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
index 86dbf1b2aeab..3cff31ad4933 100644
--- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -61,12 +61,10 @@ static unsigned lcm(unsigned A, unsigned B) {
return LCM;
}
-void TargetSchedModel::init(const MCSchedModel &sm,
- const TargetSubtargetInfo *sti,
- const TargetInstrInfo *tii) {
- SchedModel = sm;
- STI = sti;
- TII = tii;
+void TargetSchedModel::init(const TargetSubtargetInfo *TSInfo) {
+ STI = TSInfo;
+ SchedModel = TSInfo->getSchedModel();
+ TII = TSInfo->getInstrInfo();
STI->initInstrItins(InstrItins);
unsigned NumRes = SchedModel.getNumProcResourceKinds();
@@ -257,31 +255,19 @@ unsigned TargetSchedModel::computeOperandLatency(
unsigned
TargetSchedModel::computeInstrLatency(const MCSchedClassDesc &SCDesc) const {
- unsigned Latency = 0;
- for (unsigned DefIdx = 0, DefEnd = SCDesc.NumWriteLatencyEntries;
- DefIdx != DefEnd; ++DefIdx) {
- // Lookup the definition's write latency in SubtargetInfo.
- const MCWriteLatencyEntry *WLEntry =
- STI->getWriteLatencyEntry(&SCDesc, DefIdx);
- Latency = std::max(Latency, capLatency(WLEntry->Cycles));
- }
- return Latency;
+ return capLatency(MCSchedModel::computeInstrLatency(*STI, SCDesc));
}
unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
assert(hasInstrSchedModel() && "Only call this function with a SchedModel");
-
unsigned SCIdx = TII->get(Opcode).getSchedClass();
- const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx);
-
- if (SCDesc->isValid() && !SCDesc->isVariant())
- return computeInstrLatency(*SCDesc);
+ return capLatency(SchedModel.computeInstrLatency(*STI, SCIdx));
+}
- if (SCDesc->isValid()) {
- assert (!SCDesc->isVariant() && "No MI sched latency: SCDesc->isVariant()");
- return computeInstrLatency(*SCDesc);
- }
- return 0;
+unsigned TargetSchedModel::computeInstrLatency(const MCInst &Inst) const {
+ if (hasInstrSchedModel())
+ return capLatency(SchedModel.computeInstrLatency(*STI, *TII, Inst));
+ return computeInstrLatency(Inst.getOpcode());
}
unsigned
@@ -336,71 +322,39 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
return 0;
}
-static Optional<double>
-getRThroughputFromItineraries(unsigned schedClass,
- const InstrItineraryData *IID){
- Optional<double> Throughput;
-
- for (const InstrStage *IS = IID->beginStage(schedClass),
- *E = IID->endStage(schedClass);
- IS != E; ++IS) {
- if (IS->getCycles()) {
- double Temp = countPopulation(IS->getUnits()) * 1.0 / IS->getCycles();
- Throughput = Throughput.hasValue()
- ? std::min(Throughput.getValue(), Temp)
- : Temp;
- }
- }
- if (Throughput.hasValue())
- // We need reciprocal throughput that's why we return such value.
- return 1 / Throughput.getValue();
- return Throughput;
-}
-
-static Optional<double>
-getRThroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc,
- const TargetSubtargetInfo *STI,
- const MCSchedModel &SchedModel) {
- Optional<double> Throughput;
-
- for (const MCWriteProcResEntry *WPR = STI->getWriteProcResBegin(SCDesc),
- *WEnd = STI->getWriteProcResEnd(SCDesc);
- WPR != WEnd; ++WPR) {
- if (WPR->Cycles) {
- unsigned NumUnits =
- SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits;
- double Temp = NumUnits * 1.0 / WPR->Cycles;
- Throughput = Throughput.hasValue()
- ? std::min(Throughput.getValue(), Temp)
- : Temp;
- }
+double
+TargetSchedModel::computeReciprocalThroughput(const MachineInstr *MI) const {
+ if (hasInstrItineraries()) {
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ return MCSchedModel::getReciprocalThroughput(SchedClass,
+ *getInstrItineraries());
}
- if (Throughput.hasValue())
- // We need reciprocal throughput that's why we return such value.
- return 1 / Throughput.getValue();
- return Throughput;
-}
-Optional<double>
-TargetSchedModel::computeInstrRThroughput(const MachineInstr *MI) const {
- if (hasInstrItineraries())
- return getRThroughputFromItineraries(MI->getDesc().getSchedClass(),
- getInstrItineraries());
if (hasInstrSchedModel())
- return getRThroughputFromInstrSchedModel(resolveSchedClass(MI), STI,
- SchedModel);
- return Optional<double>();
+ return MCSchedModel::getReciprocalThroughput(*STI, *resolveSchedClass(MI));
+
+ return 0.0;
}
-Optional<double>
-TargetSchedModel::computeInstrRThroughput(unsigned Opcode) const {
+double
+TargetSchedModel::computeReciprocalThroughput(unsigned Opcode) const {
unsigned SchedClass = TII->get(Opcode).getSchedClass();
if (hasInstrItineraries())
- return getRThroughputFromItineraries(SchedClass, getInstrItineraries());
+ return MCSchedModel::getReciprocalThroughput(SchedClass,
+ *getInstrItineraries());
if (hasInstrSchedModel()) {
- const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
- if (SCDesc->isValid() && !SCDesc->isVariant())
- return getRThroughputFromInstrSchedModel(SCDesc, STI, SchedModel);
+ const MCSchedClassDesc &SCDesc = *SchedModel.getSchedClassDesc(SchedClass);
+ if (SCDesc.isValid() && !SCDesc.isVariant())
+ return MCSchedModel::getReciprocalThroughput(*STI, SCDesc);
}
- return Optional<double>();
+
+ return 0.0;
}
+
+double
+TargetSchedModel::computeReciprocalThroughput(const MCInst &MI) const {
+ if (hasInstrSchedModel())
+ return SchedModel.getReciprocalThroughput(*STI, *TII, MI);
+ return computeReciprocalThroughput(MI.getOpcode());
+}
+
diff --git a/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
index 8693f344f9be..fa29c05fd6c2 100644
--- a/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -67,18 +67,15 @@ bool TargetSubtargetInfo::useAA() const {
return false;
}
-static std::string createSchedInfoStr(unsigned Latency,
- Optional<double> RThroughput) {
+static std::string createSchedInfoStr(unsigned Latency, double RThroughput) {
static const char *SchedPrefix = " sched: [";
std::string Comment;
raw_string_ostream CS(Comment);
- if (Latency > 0 && RThroughput.hasValue())
- CS << SchedPrefix << Latency << format(":%2.2f", RThroughput.getValue())
+ if (RThroughput != 0.0)
+ CS << SchedPrefix << Latency << format(":%2.2f", RThroughput)
<< "]";
- else if (Latency > 0)
+ else
CS << SchedPrefix << Latency << ":?]";
- else if (RThroughput.hasValue())
- CS << SchedPrefix << "?:" << RThroughput.getValue() << "]";
CS.flush();
return Comment;
}
@@ -90,9 +87,9 @@ std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const {
// We don't cache TSchedModel because it depends on TargetInstrInfo
// that could be changed during the compilation
TargetSchedModel TSchedModel;
- TSchedModel.init(getSchedModel(), this, getInstrInfo());
+ TSchedModel.init(this);
unsigned Latency = TSchedModel.computeInstrLatency(&MI);
- Optional<double> RThroughput = TSchedModel.computeInstrRThroughput(&MI);
+ double RThroughput = TSchedModel.computeReciprocalThroughput(&MI);
return createSchedInfoStr(Latency, RThroughput);
}
@@ -101,17 +98,19 @@ std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {
// We don't cache TSchedModel because it depends on TargetInstrInfo
// that could be changed during the compilation
TargetSchedModel TSchedModel;
- TSchedModel.init(getSchedModel(), this, getInstrInfo());
+ TSchedModel.init(this);
unsigned Latency;
if (TSchedModel.hasInstrSchedModel())
- Latency = TSchedModel.computeInstrLatency(MCI.getOpcode());
+ Latency = TSchedModel.computeInstrLatency(MCI);
else if (TSchedModel.hasInstrItineraries()) {
auto *ItinData = TSchedModel.getInstrItineraries();
Latency = ItinData->getStageLatency(
getInstrInfo()->get(MCI.getOpcode()).getSchedClass());
} else
return std::string();
- Optional<double> RThroughput =
- TSchedModel.computeInstrRThroughput(MCI.getOpcode());
+ double RThroughput = TSchedModel.computeReciprocalThroughput(MCI);
return createSchedInfoStr(Latency, RThroughput);
}
+
+void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const {
+}
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 774b76f84b7f..0ca435016ead 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -290,8 +290,8 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
unsigned NumVisited = 0;
for (MachineInstr &OtherMI : make_range(std::next(OldPos), KillPos)) {
- // DBG_VALUE cannot be counted against the limit.
- if (OtherMI.isDebugValue())
+ // Debug instructions cannot be counted against the limit.
+ if (OtherMI.isDebugInstr())
continue;
if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost.
return false;
@@ -685,15 +685,15 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
unsigned RegCIdx,
unsigned Dist) {
unsigned RegC = MI->getOperand(RegCIdx).getReg();
- DEBUG(dbgs() << "2addr: COMMUTING : " << *MI);
+ LLVM_DEBUG(dbgs() << "2addr: COMMUTING : " << *MI);
MachineInstr *NewMI = TII->commuteInstruction(*MI, false, RegBIdx, RegCIdx);
if (NewMI == nullptr) {
- DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
+ LLVM_DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
return false;
}
- DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI);
+ LLVM_DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI);
assert(NewMI == MI &&
"TargetInstrInfo::commuteInstruction() should not return a new "
"instruction unless it was requested.");
@@ -740,8 +740,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
if (!NewMI)
return false;
- DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
- DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
+ LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+ LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
bool Sunk = false;
if (LIS)
@@ -940,8 +940,8 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator KillPos = KillMI;
++KillPos;
for (MachineInstr &OtherMI : make_range(End, KillPos)) {
- // DBG_VALUE cannot be counted against the limit.
- if (OtherMI.isDebugValue())
+ // Debug instructions cannot be counted against the limit.
+ if (OtherMI.isDebugInstr())
continue;
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
return false;
@@ -985,7 +985,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
}
// Move debug info as well.
- while (Begin != MBB->begin() && std::prev(Begin)->isDebugValue())
+ while (Begin != MBB->begin() && std::prev(Begin)->isDebugInstr())
--Begin;
nmi = End;
@@ -1014,7 +1014,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
LV->addVirtualRegisterKilled(Reg, *MI);
}
- DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);
+ LLVM_DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);
return true;
}
@@ -1114,8 +1114,8 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
unsigned NumVisited = 0;
for (MachineInstr &OtherMI :
make_range(mi, MachineBasicBlock::iterator(KillMI))) {
- // DBG_VALUE cannot be counted against the limit.
- if (OtherMI.isDebugValue())
+ // Debug instructions cannot be counted against the limit.
+ if (OtherMI.isDebugInstr())
continue;
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
return false;
@@ -1162,11 +1162,11 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
// Move the old kill above MI, don't forget to move debug info as well.
MachineBasicBlock::iterator InsertPos = mi;
- while (InsertPos != MBB->begin() && std::prev(InsertPos)->isDebugValue())
+ while (InsertPos != MBB->begin() && std::prev(InsertPos)->isDebugInstr())
--InsertPos;
MachineBasicBlock::iterator From = KillMI;
MachineBasicBlock::iterator To = std::next(From);
- while (std::prev(From)->isDebugValue())
+ while (std::prev(From)->isDebugInstr())
--From;
MBB->splice(InsertPos, MBB, From, To);
@@ -1181,7 +1181,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
LV->addVirtualRegisterKilled(Reg, *MI);
}
- DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);
+ LLVM_DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);
return true;
}
@@ -1205,6 +1205,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
if (!MI->isCommutable())
return false;
+ bool MadeChange = false;
unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg();
unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg();
unsigned OpsNum = MI->getDesc().getNumOperands();
@@ -1223,8 +1224,8 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
// If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp
// operands. This makes the live ranges of DstOp and OtherOp joinable.
- bool DoCommute =
- !BaseOpKilled && isKilled(*MI, OtherOpReg, MRI, TII, LIS, false);
+ bool OtherOpKilled = isKilled(*MI, OtherOpReg, MRI, TII, LIS, false);
+ bool DoCommute = !BaseOpKilled && OtherOpKilled;
if (!DoCommute &&
isProfitableToCommute(DstOpReg, BaseOpReg, OtherOpReg, MI, Dist)) {
@@ -1235,13 +1236,21 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
// If it's profitable to commute, try to do so.
if (DoCommute && commuteInstruction(MI, DstOpIdx, BaseOpIdx, OtherOpIdx,
Dist)) {
+ MadeChange = true;
++NumCommuted;
- if (AggressiveCommute)
+ if (AggressiveCommute) {
++NumAggrCommuted;
- return true;
+ // There might be more than two commutable operands, update BaseOp and
+ // continue scanning.
+ BaseOpReg = OtherOpReg;
+ BaseOpKilled = OtherOpKilled;
+ continue;
+ }
+ // If this was a commute based on kill, we won't do better continuing.
+ return MadeChange;
}
}
- return false;
+ return MadeChange;
}
/// For the case where an instruction has a single pair of tied register
@@ -1343,7 +1352,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);
if (UnfoldMCID.getNumDefs() == 1) {
// Unfold the load.
- DEBUG(dbgs() << "2addr: UNFOLDING: " << MI);
+ LLVM_DEBUG(dbgs() << "2addr: UNFOLDING: " << MI);
const TargetRegisterClass *RC =
TRI->getAllocatableClass(
TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF));
@@ -1352,7 +1361,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
if (!TII->unfoldMemoryOperand(*MF, MI, Reg,
/*UnfoldLoad=*/true,
/*UnfoldStore=*/false, NewMIs)) {
- DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
return false;
}
assert(NewMIs.size() == 2 &&
@@ -1365,8 +1374,8 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
MBB->insert(mi, NewMIs[0]);
MBB->insert(mi, NewMIs[1]);
- DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0]
- << "2addr: NEW INST: " << *NewMIs[1]);
+ LLVM_DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0]
+ << "2addr: NEW INST: " << *NewMIs[1]);
// Transform the instruction, now that it no longer has a load.
unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
@@ -1431,7 +1440,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// Transforming didn't eliminate the tie and didn't lead to an
// improvement. Clean up the unfolded instructions and keep the
// original.
- DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
NewMIs[0]->eraseFromParent();
NewMIs[1]->eraseFromParent();
}
@@ -1475,7 +1484,7 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
MRI->constrainRegClass(DstReg, RC);
SrcMO.setReg(DstReg);
SrcMO.setSubReg(0);
- DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
+ LLVM_DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
continue;
}
TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx));
@@ -1574,7 +1583,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
}
- DEBUG(dbgs() << "\t\tprepend:\t" << *MIB);
+ LLVM_DEBUG(dbgs() << "\t\tprepend:\t" << *MIB);
MachineOperand &MO = MI->getOperand(SrcIdx);
assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
@@ -1668,9 +1677,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
bool MadeChange = false;
- DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
- DEBUG(dbgs() << "********** Function: "
- << MF->getName() << '\n');
+ LLVM_DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
+ LLVM_DEBUG(dbgs() << "********** Function: " << MF->getName() << '\n');
// This pass takes the function out of SSA form.
MRI->leaveSSA();
@@ -1690,7 +1698,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
MachineBasicBlock::iterator nmi = std::next(mi);
// Don't revisit an instruction previously converted by target. It may
// contain undef register operands (%noreg), which are not handled.
- if (mi->isDebugValue() || SunkInstrs.count(&*mi)) {
+ if (mi->isDebugInstr() || SunkInstrs.count(&*mi)) {
mi = nmi;
continue;
}
@@ -1713,7 +1721,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
++NumTwoAddressInstrs;
MadeChange = true;
- DEBUG(dbgs() << '\t' << *mi);
+ LLVM_DEBUG(dbgs() << '\t' << *mi);
// If the instruction has a single pair of tied operands, try some
// transformations that may either eliminate the tied operands or
@@ -1740,7 +1748,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// Now iterate over the information collected above.
for (auto &TO : TiedOperands) {
processTiedPairs(&*mi, TO.second, Dist);
- DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
+ LLVM_DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
}
// Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
@@ -1754,7 +1762,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef());
mi->RemoveOperand(1);
mi->setDesc(TII->get(TargetOpcode::COPY));
- DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+ LLVM_DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
}
// Clear TiedOperands here instead of at the top of the loop
@@ -1787,7 +1795,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
if (MI.getOperand(0).getSubReg() ||
TargetRegisterInfo::isPhysicalRegister(DstReg) ||
!(MI.getNumOperands() & 1)) {
- DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI);
+ LLVM_DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI);
llvm_unreachable(nullptr);
}
@@ -1838,19 +1846,19 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
LV->replaceKillInstruction(SrcReg, MI, *CopyMI);
- DEBUG(dbgs() << "Inserted: " << *CopyMI);
+ LLVM_DEBUG(dbgs() << "Inserted: " << *CopyMI);
}
MachineBasicBlock::iterator EndMBBI =
std::next(MachineBasicBlock::iterator(MI));
if (!DefEmitted) {
- DEBUG(dbgs() << "Turned: " << MI << " into an IMPLICIT_DEF");
+ LLVM_DEBUG(dbgs() << "Turned: " << MI << " into an IMPLICIT_DEF");
MI.setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j)
MI.RemoveOperand(j);
} else {
- DEBUG(dbgs() << "Eliminated: " << MI);
+ LLVM_DEBUG(dbgs() << "Eliminated: " << MI);
MI.eraseFromParent();
}
diff --git a/contrib/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm/lib/CodeGen/ValueTypes.cpp
new file mode 100644
index 000000000000..adb7075de651
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ValueTypes.cpp
@@ -0,0 +1,321 @@
+//===----------- ValueTypes.cpp - Implementation of EVT methods -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+EVT EVT::changeExtendedTypeToInteger() const {
+ LLVMContext &Context = LLVMTy->getContext();
+ return getIntegerVT(Context, getSizeInBits());
+}
+
+EVT EVT::changeExtendedVectorElementTypeToInteger() const {
+ LLVMContext &Context = LLVMTy->getContext();
+ EVT IntTy = getIntegerVT(Context, getScalarSizeInBits());
+ return getVectorVT(Context, IntTy, getVectorNumElements());
+}
+
+EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) {
+ EVT VT;
+ VT.LLVMTy = IntegerType::get(Context, BitWidth);
+ assert(VT.isExtended() && "Type is not extended!");
+ return VT;
+}
+
+EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT,
+ unsigned NumElements) {
+ EVT ResultVT;
+ ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), NumElements);
+ assert(ResultVT.isExtended() && "Type is not extended!");
+ return ResultVT;
+}
+
+bool EVT::isExtendedFloatingPoint() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isFPOrFPVectorTy();
+}
+
+bool EVT::isExtendedInteger() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isIntOrIntVectorTy();
+}
+
+bool EVT::isExtendedScalarInteger() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isIntegerTy();
+}
+
+bool EVT::isExtendedVector() const {
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy->isVectorTy();
+}
+
+bool EVT::isExtended16BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 16;
+}
+
+bool EVT::isExtended32BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 32;
+}
+
+bool EVT::isExtended64BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 64;
+}
+
+bool EVT::isExtended128BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 128;
+}
+
+bool EVT::isExtended256BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 256;
+}
+
+bool EVT::isExtended512BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 512;
+}
+
+bool EVT::isExtended1024BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 1024;
+}
+
+bool EVT::isExtended2048BitVector() const {
+ return isExtendedVector() && getExtendedSizeInBits() == 2048;
+}
+
+EVT EVT::getExtendedVectorElementType() const {
+ assert(isExtended() && "Type is not extended!");
+ return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
+}
+
+unsigned EVT::getExtendedVectorNumElements() const {
+ assert(isExtended() && "Type is not extended!");
+ return cast<VectorType>(LLVMTy)->getNumElements();
+}
+
+unsigned EVT::getExtendedSizeInBits() const {
+ assert(isExtended() && "Type is not extended!");
+ if (IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
+ return ITy->getBitWidth();
+ if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
+ return VTy->getBitWidth();
+ llvm_unreachable("Unrecognized extended type!");
+}
+
+/// getEVTString - This function returns value type as a string, e.g. "i32".
+std::string EVT::getEVTString() const {
+ switch (V.SimpleTy) {
+ default:
+ if (isVector())
+ return "v" + utostr(getVectorNumElements()) +
+ getVectorElementType().getEVTString();
+ if (isInteger())
+ return "i" + utostr(getSizeInBits());
+ llvm_unreachable("Invalid EVT!");
+ case MVT::i1: return "i1";
+ case MVT::i8: return "i8";
+ case MVT::i16: return "i16";
+ case MVT::i32: return "i32";
+ case MVT::i64: return "i64";
+ case MVT::i128: return "i128";
+ case MVT::f16: return "f16";
+ case MVT::f32: return "f32";
+ case MVT::f64: return "f64";
+ case MVT::f80: return "f80";
+ case MVT::f128: return "f128";
+ case MVT::ppcf128: return "ppcf128";
+ case MVT::isVoid: return "isVoid";
+ case MVT::Other: return "ch";
+ case MVT::Glue: return "glue";
+ case MVT::x86mmx: return "x86mmx";
+ case MVT::v1i1: return "v1i1";
+ case MVT::v2i1: return "v2i1";
+ case MVT::v4i1: return "v4i1";
+ case MVT::v8i1: return "v8i1";
+ case MVT::v16i1: return "v16i1";
+ case MVT::v32i1: return "v32i1";
+ case MVT::v64i1: return "v64i1";
+ case MVT::v128i1: return "v128i1";
+ case MVT::v512i1: return "v512i1";
+ case MVT::v1024i1: return "v1024i1";
+ case MVT::v1i8: return "v1i8";
+ case MVT::v2i8: return "v2i8";
+ case MVT::v4i8: return "v4i8";
+ case MVT::v8i8: return "v8i8";
+ case MVT::v16i8: return "v16i8";
+ case MVT::v32i8: return "v32i8";
+ case MVT::v64i8: return "v64i8";
+ case MVT::v128i8: return "v128i8";
+ case MVT::v256i8: return "v256i8";
+ case MVT::v1i16: return "v1i16";
+ case MVT::v2i16: return "v2i16";
+ case MVT::v4i16: return "v4i16";
+ case MVT::v8i16: return "v8i16";
+ case MVT::v16i16: return "v16i16";
+ case MVT::v32i16: return "v32i16";
+ case MVT::v64i16: return "v64i16";
+ case MVT::v128i16: return "v128i16";
+ case MVT::v1i32: return "v1i32";
+ case MVT::v2i32: return "v2i32";
+ case MVT::v4i32: return "v4i32";
+ case MVT::v8i32: return "v8i32";
+ case MVT::v16i32: return "v16i32";
+ case MVT::v32i32: return "v32i32";
+ case MVT::v64i32: return "v64i32";
+ case MVT::v1i64: return "v1i64";
+ case MVT::v2i64: return "v2i64";
+ case MVT::v4i64: return "v4i64";
+ case MVT::v8i64: return "v8i64";
+ case MVT::v16i64: return "v16i64";
+ case MVT::v32i64: return "v32i64";
+ case MVT::v1i128: return "v1i128";
+ case MVT::v1f32: return "v1f32";
+ case MVT::v2f32: return "v2f32";
+ case MVT::v2f16: return "v2f16";
+ case MVT::v4f16: return "v4f16";
+ case MVT::v8f16: return "v8f16";
+ case MVT::v4f32: return "v4f32";
+ case MVT::v8f32: return "v8f32";
+ case MVT::v16f32: return "v16f32";
+ case MVT::v1f64: return "v1f64";
+ case MVT::v2f64: return "v2f64";
+ case MVT::v4f64: return "v4f64";
+ case MVT::v8f64: return "v8f64";
+ case MVT::Metadata:return "Metadata";
+ case MVT::Untyped: return "Untyped";
+ case MVT::ExceptRef: return "ExceptRef";
+ }
+}
+
+/// getTypeForEVT - This method returns an LLVM type corresponding to the
+/// specified EVT. For integer types, this returns an unsigned type. Note
+/// that this will abort for types that cannot be represented.
+Type *EVT::getTypeForEVT(LLVMContext &Context) const {
+ switch (V.SimpleTy) {
+ default:
+ assert(isExtended() && "Type is not extended!");
+ return LLVMTy;
+ case MVT::isVoid: return Type::getVoidTy(Context);
+ case MVT::i1: return Type::getInt1Ty(Context);
+ case MVT::i8: return Type::getInt8Ty(Context);
+ case MVT::i16: return Type::getInt16Ty(Context);
+ case MVT::i32: return Type::getInt32Ty(Context);
+ case MVT::i64: return Type::getInt64Ty(Context);
+ case MVT::i128: return IntegerType::get(Context, 128);
+ case MVT::f16: return Type::getHalfTy(Context);
+ case MVT::f32: return Type::getFloatTy(Context);
+ case MVT::f64: return Type::getDoubleTy(Context);
+ case MVT::f80: return Type::getX86_FP80Ty(Context);
+ case MVT::f128: return Type::getFP128Ty(Context);
+ case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
+ case MVT::x86mmx: return Type::getX86_MMXTy(Context);
+ case MVT::v1i1: return VectorType::get(Type::getInt1Ty(Context), 1);
+ case MVT::v2i1: return VectorType::get(Type::getInt1Ty(Context), 2);
+ case MVT::v4i1: return VectorType::get(Type::getInt1Ty(Context), 4);
+ case MVT::v8i1: return VectorType::get(Type::getInt1Ty(Context), 8);
+ case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16);
+ case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32);
+ case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64);
+ case MVT::v128i1: return VectorType::get(Type::getInt1Ty(Context), 128);
+ case MVT::v512i1: return VectorType::get(Type::getInt1Ty(Context), 512);
+ case MVT::v1024i1: return VectorType::get(Type::getInt1Ty(Context), 1024);
+ case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1);
+ case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2);
+ case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4);
+ case MVT::v8i8: return VectorType::get(Type::getInt8Ty(Context), 8);
+ case MVT::v16i8: return VectorType::get(Type::getInt8Ty(Context), 16);
+ case MVT::v32i8: return VectorType::get(Type::getInt8Ty(Context), 32);
+ case MVT::v64i8: return VectorType::get(Type::getInt8Ty(Context), 64);
+ case MVT::v128i8: return VectorType::get(Type::getInt8Ty(Context), 128);
+ case MVT::v256i8: return VectorType::get(Type::getInt8Ty(Context), 256);
+ case MVT::v1i16: return VectorType::get(Type::getInt16Ty(Context), 1);
+ case MVT::v2i16: return VectorType::get(Type::getInt16Ty(Context), 2);
+ case MVT::v4i16: return VectorType::get(Type::getInt16Ty(Context), 4);
+ case MVT::v8i16: return VectorType::get(Type::getInt16Ty(Context), 8);
+ case MVT::v16i16: return VectorType::get(Type::getInt16Ty(Context), 16);
+ case MVT::v32i16: return VectorType::get(Type::getInt16Ty(Context), 32);
+ case MVT::v64i16: return VectorType::get(Type::getInt16Ty(Context), 64);
+ case MVT::v128i16: return VectorType::get(Type::getInt16Ty(Context), 128);
+ case MVT::v1i32: return VectorType::get(Type::getInt32Ty(Context), 1);
+ case MVT::v2i32: return VectorType::get(Type::getInt32Ty(Context), 2);
+ case MVT::v4i32: return VectorType::get(Type::getInt32Ty(Context), 4);
+ case MVT::v8i32: return VectorType::get(Type::getInt32Ty(Context), 8);
+ case MVT::v16i32: return VectorType::get(Type::getInt32Ty(Context), 16);
+ case MVT::v32i32: return VectorType::get(Type::getInt32Ty(Context), 32);
+ case MVT::v64i32: return VectorType::get(Type::getInt32Ty(Context), 64);
+ case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1);
+ case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2);
+ case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4);
+ case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8);
+ case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16);
+ case MVT::v32i64: return VectorType::get(Type::getInt64Ty(Context), 32);
+ case MVT::v1i128: return VectorType::get(Type::getInt128Ty(Context), 1);
+ case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2);
+ case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4);
+ case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8);
+ case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1);
+ case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
+ case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4);
+ case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8);
+ case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16);
+ case MVT::v1f64: return VectorType::get(Type::getDoubleTy(Context), 1);
+ case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2);
+ case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4);
+ case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8);
+ case MVT::Metadata: return Type::getMetadataTy(Context);
+ }
+}
+
+/// Return the value type corresponding to the specified type. This returns all
+/// pointers as MVT::iPTR. If HandleUnknown is true, unknown types are returned
+/// as Other, otherwise they are invalid.
+MVT MVT::getVT(Type *Ty, bool HandleUnknown){
+ switch (Ty->getTypeID()) {
+ default:
+ if (HandleUnknown) return MVT(MVT::Other);
+ llvm_unreachable("Unknown type!");
+ case Type::VoidTyID:
+ return MVT::isVoid;
+ case Type::IntegerTyID:
+ return getIntegerVT(cast<IntegerType>(Ty)->getBitWidth());
+ case Type::HalfTyID: return MVT(MVT::f16);
+ case Type::FloatTyID: return MVT(MVT::f32);
+ case Type::DoubleTyID: return MVT(MVT::f64);
+ case Type::X86_FP80TyID: return MVT(MVT::f80);
+ case Type::X86_MMXTyID: return MVT(MVT::x86mmx);
+ case Type::FP128TyID: return MVT(MVT::f128);
+ case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
+ case Type::PointerTyID: return MVT(MVT::iPTR);
+ case Type::VectorTyID: {
+ VectorType *VTy = cast<VectorType>(Ty);
+ return getVectorVT(
+ getVT(VTy->getElementType(), false), VTy->getNumElements());
+ }
+ }
+}
+
+/// getEVT - Return the value type corresponding to the specified type. This
+/// returns all pointers as MVT::iPTR. If HandleUnknown is true, unknown types
+/// are returned as Other, otherwise they are invalid.
+EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
+ switch (Ty->getTypeID()) {
+ default:
+ return MVT::getVT(Ty, HandleUnknown);
+ case Type::IntegerTyID:
+ return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
+ case Type::VectorTyID: {
+ VectorType *VTy = cast<VectorType>(Ty);
+ return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false),
+ VTy->getNumElements());
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index 13f7e83f3dd0..0ead2b8340ab 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
@@ -241,10 +242,9 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
Indexes = &getAnalysis<SlotIndexes>();
LIS = &getAnalysis<LiveIntervals>();
VRM = &getAnalysis<VirtRegMap>();
- DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
- << "********** Function: "
- << MF->getName() << '\n');
- DEBUG(VRM->dump());
+ LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
+ << "********** Function: " << MF->getName() << '\n');
+ LLVM_DEBUG(VRM->dump());
// Add kill flags while we still have virtual registers.
LIS->addKillFlags(VRM);
@@ -376,7 +376,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
if (!MI.isIdentityCopy())
return;
- DEBUG(dbgs() << "Identity copy: " << MI);
+ LLVM_DEBUG(dbgs() << "Identity copy: " << MI);
++NumIdCopies;
// Copies like:
@@ -387,14 +387,14 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
// instruction to maintain this information.
if (MI.getOperand(0).isUndef() || MI.getNumOperands() > 2) {
MI.setDesc(TII->get(TargetOpcode::KILL));
- DEBUG(dbgs() << " replace by: " << MI);
+ LLVM_DEBUG(dbgs() << " replace by: " << MI);
return;
}
if (Indexes)
Indexes->removeSingleMachineInstrFromMaps(MI);
MI.eraseFromBundle();
- DEBUG(dbgs() << " deleted.\n");
+ LLVM_DEBUG(dbgs() << " deleted.\n");
}
/// The liverange splitting logic sometimes produces bundles of copies when
@@ -406,6 +406,8 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
return;
if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) {
+ SmallVector<MachineInstr *, 2> MIs({&MI});
+
// Only do this when the complete bundle is made out of COPYs.
MachineBasicBlock &MBB = *MI.getParent();
for (MachineBasicBlock::reverse_instr_iterator I =
@@ -413,16 +415,53 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
I != E && I->isBundledWithSucc(); ++I) {
if (!I->isCopy())
return;
+ MIs.push_back(&*I);
+ }
+ MachineInstr *FirstMI = MIs.back();
+
+ auto anyRegsAlias = [](const MachineInstr *Dst,
+ ArrayRef<MachineInstr *> Srcs,
+ const TargetRegisterInfo *TRI) {
+ for (const MachineInstr *Src : Srcs)
+ if (Src != Dst)
+ if (TRI->regsOverlap(Dst->getOperand(0).getReg(),
+ Src->getOperand(1).getReg()))
+ return true;
+ return false;
+ };
+
+ // If any of the destination registers in the bundle of copies alias any of
+ // the source registers, try to schedule the instructions to avoid any
+ // clobbering.
+ for (int E = MIs.size(), PrevE = E; E > 1; PrevE = E) {
+ for (int I = E; I--; )
+ if (!anyRegsAlias(MIs[I], makeArrayRef(MIs).take_front(E), TRI)) {
+ if (I + 1 != E)
+ std::swap(MIs[I], MIs[E - 1]);
+ --E;
+ }
+ if (PrevE == E) {
+ MF->getFunction().getContext().emitError(
+ "register rewriting failed: cycle in copy bundle");
+ break;
+ }
}
- for (MachineBasicBlock::reverse_instr_iterator I = MI.getReverseIterator();
- I->isBundledWithPred(); ) {
- MachineInstr &MI = *I;
- ++I;
+ MachineInstr *BundleStart = FirstMI;
+ for (MachineInstr *BundledMI : llvm::reverse(MIs)) {
+ // If instruction is in the middle of the bundle, move it before the
+ // bundle starts, otherwise, just unbundle it. When we get to the last
+ // instruction, the bundle will have been completely undone.
+ if (BundledMI != BundleStart) {
+ BundledMI->removeFromBundle();
+ MBB.insert(FirstMI, BundledMI);
+ } else if (BundledMI->isBundledWithSucc()) {
+ BundledMI->unbundleFromSucc();
+ BundleStart = &*std::next(BundledMI->getIterator());
+ }
- MI.unbundleFromPred();
- if (Indexes)
- Indexes->insertMachineInstrInMaps(MI);
+ if (Indexes && BundledMI != FirstMI)
+ Indexes->insertMachineInstrInMaps(*BundledMI);
}
}
}
@@ -461,7 +500,7 @@ void VirtRegRewriter::rewrite() {
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
- DEBUG(MBBI->print(dbgs(), Indexes));
+ LLVM_DEBUG(MBBI->print(dbgs(), Indexes));
for (MachineBasicBlock::instr_iterator
MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
MachineInstr *MI = &*MII;
@@ -530,7 +569,7 @@ void VirtRegRewriter::rewrite() {
// Rewrite. Note we could have used MachineOperand::substPhysReg(), but
// we need the inlining here.
MO.setReg(PhysReg);
- MO.setIsRenamableIfNoExtraRegAllocReq();
+ MO.setIsRenamable(true);
}
// Add any missing super-register kills after rewriting the whole
@@ -544,7 +583,7 @@ void VirtRegRewriter::rewrite() {
while (!SuperDefs.empty())
MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI);
- DEBUG(dbgs() << "> " << *MI);
+ LLVM_DEBUG(dbgs() << "> " << *MI);
expandCopyBundle(*MI);
diff --git a/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp
new file mode 100644
index 000000000000..83d04da5dd0c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -0,0 +1,374 @@
+//===-- WasmEHPrepare - Prepare excepton handling for WebAssembly --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which use
+// WebAssembly exception handling scheme.
+//
+// WebAssembly exception handling uses Windows exception IR for the middle level
+// representation. This pass does the following transformation for every
+// catchpad block:
+// (In C-style pseudocode)
+//
+// - Before:
+// catchpad ...
+// exn = wasm.get.exception();
+// selector = wasm.get.selector();
+// ...
+//
+// - After:
+// catchpad ...
+// exn = wasm.catch(0); // 0 is a tag for C++
+// wasm.landingpad.index(index);
+// // Only add below in case it's not a single catch (...)
+// __wasm_lpad_context.lpad_index = index;
+// __wasm_lpad_context.lsda = wasm.lsda();
+// _Unwind_CallPersonality(exn);
+// int selector = __wasm.landingpad_context.selector;
+// ...
+//
+// Also, does the following for a cleanuppad block with a call to
+// __clang_call_terminate():
+// - Before:
+// cleanuppad ...
+// exn = wasm.get.exception();
+// __clang_call_terminate(exn);
+//
+// - After:
+// cleanuppad ...
+// exn = wasm.catch(0); // 0 is a tag for C++
+// __clang_call_terminate(exn);
+//
+//
+// * Background: WebAssembly EH instructions
+// WebAssembly's try and catch instructions are structured as follows:
+// try
+// instruction*
+// catch (C++ tag)
+// instruction*
+// ...
+// catch_all
+// instruction*
+// try_end
+//
+// A catch instruction in WebAssembly does not correspond to a C++ catch clause.
+// In WebAssembly, there is a single catch instruction for all C++ exceptions.
+// There can be more catch instructions for exceptions in other languages, but
+// they are not generated for now. catch_all catches all exceptions including
+// foreign exceptions (e.g. JavaScript). We turn catchpads into catch (C++ tag)
+// and cleanuppads into catch_all, with one exception: cleanuppad with a call to
+// __clang_call_terminate should be both in catch (C++ tag) and catch_all.
+//
+//
+// * Background: Direct personality function call
+// In WebAssembly EH, the VM is responsible for unwinding the stack once an
+// exception is thrown. After the stack is unwound, the control flow is
+// transfered to WebAssembly 'catch' instruction, which returns a caught
+// exception object.
+//
+// Unwinding the stack is not done by libunwind but the VM, so the personality
+// function in libcxxabi cannot be called from libunwind during the unwinding
+// process. So after a catch instruction, we insert a call to a wrapper function
+// in libunwind that in turn calls the real personality function.
+//
+// In Itanium EH, if the personality function decides there is no matching catch
+// clause in a call frame and no cleanup action to perform, the unwinder doesn't
+// stop there and continues unwinding. But in Wasm EH, the unwinder stops at
+// every call frame with a catch intruction, after which the personality
+// function is called from the compiler-generated user code here.
+//
+// In libunwind, we have this struct that serves as a communincation channel
+// between the compiler-generated user code and the personality function in
+// libcxxabi.
+//
+// struct _Unwind_LandingPadContext {
+// uintptr_t lpad_index;
+// uintptr_t lsda;
+// uintptr_t selector;
+// };
+// struct _Unwind_LandingPadContext __wasm_lpad_context = ...;
+//
+// And this wrapper in libunwind calls the personality function.
+//
+// _Unwind_Reason_Code _Unwind_CallPersonality(void *exception_ptr) {
+// struct _Unwind_Exception *exception_obj =
+// (struct _Unwind_Exception *)exception_ptr;
+// _Unwind_Reason_Code ret = __gxx_personality_v0(
+// 1, _UA_CLEANUP_PHASE, exception_obj->exception_class, exception_obj,
+// (struct _Unwind_Context *)__wasm_lpad_context);
+// return ret;
+// }
+//
+// We pass a landing pad index, and the address of LSDA for the current function
+// to the wrapper function _Unwind_CallPersonality in libunwind, and we retrieve
+// the selector after it returns.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "wasmehprepare"
+
+namespace {
+class WasmEHPrepare : public FunctionPass {
+ Type *LPadContextTy = nullptr; // type of 'struct _Unwind_LandingPadContext'
+ GlobalVariable *LPadContextGV = nullptr; // __wasm_lpad_context
+
+ // Field addresses of struct _Unwind_LandingPadContext
+ Value *LPadIndexField = nullptr; // lpad_index field
+ Value *LSDAField = nullptr; // lsda field
+ Value *SelectorField = nullptr; // selector
+
+ Function *CatchF = nullptr; // wasm.catch.extract() intrinsic
+ Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic
+ Function *LSDAF = nullptr; // wasm.lsda() intrinsic
+ Function *GetExnF = nullptr; // wasm.get.exception() intrinsic
+ Function *GetSelectorF = nullptr; // wasm.get.ehselector() intrinsic
+ Function *CallPersonalityF = nullptr; // _Unwind_CallPersonality() wrapper
+ Function *ClangCallTermF = nullptr; // __clang_call_terminate() function
+
+ void prepareEHPad(BasicBlock *BB, unsigned Index);
+ void prepareTerminateCleanupPad(BasicBlock *BB);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ WasmEHPrepare() : FunctionPass(ID) {}
+
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+
+ StringRef getPassName() const override {
+ return "WebAssembly Exception handling preparation";
+ }
+};
+} // end anonymous namespace
+
+char WasmEHPrepare::ID = 0;
+INITIALIZE_PASS(WasmEHPrepare, DEBUG_TYPE, "Prepare WebAssembly exceptions",
+ false, false)
+
+FunctionPass *llvm::createWasmEHPass() { return new WasmEHPrepare(); }
+
+bool WasmEHPrepare::doInitialization(Module &M) {
+ IRBuilder<> IRB(M.getContext());
+ LPadContextTy = StructType::get(IRB.getInt32Ty(), // lpad_index
+ IRB.getInt8PtrTy(), // lsda
+ IRB.getInt32Ty() // selector
+ );
+ return false;
+}
+
+bool WasmEHPrepare::runOnFunction(Function &F) {
+ SmallVector<BasicBlock *, 16> CatchPads;
+ SmallVector<BasicBlock *, 16> CleanupPads;
+ for (BasicBlock &BB : F) {
+ if (!BB.isEHPad())
+ continue;
+ auto *Pad = BB.getFirstNonPHI();
+ if (isa<CatchPadInst>(Pad))
+ CatchPads.push_back(&BB);
+ else if (isa<CleanupPadInst>(Pad))
+ CleanupPads.push_back(&BB);
+ }
+
+ if (CatchPads.empty() && CleanupPads.empty())
+ return false;
+ assert(F.hasPersonalityFn() && "Personality function not found");
+
+ Module &M = *F.getParent();
+ IRBuilder<> IRB(F.getContext());
+
+ // __wasm_lpad_context global variable
+ LPadContextGV = cast<GlobalVariable>(
+ M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy));
+ LPadIndexField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 0,
+ "lpad_index_gep");
+ LSDAField =
+ IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 1, "lsda_gep");
+ SelectorField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 2,
+ "selector_gep");
+
+ // wasm.catch() intinsic, which will be lowered to wasm 'catch' instruction.
+ CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch);
+ // wasm.landingpad.index() intrinsic, which is to specify landingpad index
+ LPadIndexF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_landingpad_index);
+ // wasm.lsda() intrinsic. Returns the address of LSDA table for the current
+ // function.
+ LSDAF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_lsda);
+ // wasm.get.exception() and wasm.get.ehselector() intrinsics. Calls to these
+ // are generated in clang.
+ GetExnF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_exception);
+ GetSelectorF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_ehselector);
+
+ // _Unwind_CallPersonality() wrapper function, which calls the personality
+ CallPersonalityF = cast<Function>(M.getOrInsertFunction(
+ "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy()));
+ CallPersonalityF->setDoesNotThrow();
+
+ // __clang_call_terminate() function, which is inserted by clang in case a
+ // cleanup throws
+ ClangCallTermF = M.getFunction("__clang_call_terminate");
+
+ unsigned Index = 0;
+ for (auto *BB : CatchPads) {
+ auto *CPI = cast<CatchPadInst>(BB->getFirstNonPHI());
+ // In case of a single catch (...), we don't need to emit LSDA
+ if (CPI->getNumArgOperands() == 1 &&
+ cast<Constant>(CPI->getArgOperand(0))->isNullValue())
+ prepareEHPad(BB, -1);
+ else
+ prepareEHPad(BB, Index++);
+ }
+
+ if (!ClangCallTermF)
+ return !CatchPads.empty();
+
+ // Cleanuppads will turn into catch_all later, but cleanuppads with a call to
+ // __clang_call_terminate() is a special case. __clang_call_terminate() takes
+ // an exception object, so we have to duplicate call in both 'catch <C++ tag>'
+ // and 'catch_all' clauses. Here we only insert a call to catch; the
+ // duplication will be done later. In catch_all, the exception object will be
+ // set to null.
+ for (auto *BB : CleanupPads)
+ for (auto &I : *BB)
+ if (auto *CI = dyn_cast<CallInst>(&I))
+ if (CI->getCalledValue() == ClangCallTermF)
+ prepareEHPad(BB, -1);
+
+ return true;
+}
+
+void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
+ assert(BB->isEHPad() && "BB is not an EHPad!");
+ IRBuilder<> IRB(BB->getContext());
+
+ IRB.SetInsertPoint(&*BB->getFirstInsertionPt());
+ // The argument to wasm.catch() is the tag for C++ exceptions, which we set to
+ // 0 for this module.
+ // Pseudocode: void *exn = wasm.catch(0);
+ Instruction *Exn = IRB.CreateCall(CatchF, IRB.getInt32(0), "exn");
+ // Replace the return value of wasm.get.exception() with the return value from
+ // wasm.catch().
+ auto *FPI = cast<FuncletPadInst>(BB->getFirstNonPHI());
+ Instruction *GetExnCI = nullptr, *GetSelectorCI = nullptr;
+ for (auto &U : FPI->uses()) {
+ if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
+ if (CI->getCalledValue() == GetExnF)
+ GetExnCI = CI;
+ else if (CI->getCalledValue() == GetSelectorF)
+ GetSelectorCI = CI;
+ }
+ }
+
+ assert(GetExnCI && "wasm.get.exception() call does not exist");
+ GetExnCI->replaceAllUsesWith(Exn);
+ GetExnCI->eraseFromParent();
+
+ // In case it is a catchpad with single catch (...) or a cleanuppad, we don't
+ // need to call personality function because we don't need a selector.
+ if (FPI->getNumArgOperands() == 0 ||
+ (FPI->getNumArgOperands() == 1 &&
+ cast<Constant>(FPI->getArgOperand(0))->isNullValue())) {
+ if (GetSelectorCI) {
+ assert(GetSelectorCI->use_empty() &&
+ "wasm.get.ehselector() still has uses!");
+ GetSelectorCI->eraseFromParent();
+ }
+ return;
+ }
+ IRB.SetInsertPoint(Exn->getNextNode());
+
+ // This is to create a map of <landingpad EH label, landingpad index> in
+ // SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables.
+ // Pseudocode: wasm.landingpad.index(Index);
+ IRB.CreateCall(LPadIndexF, IRB.getInt32(Index));
+
+ // Pseudocode: __wasm_lpad_context.lpad_index = index;
+ IRB.CreateStore(IRB.getInt32(Index), LPadIndexField);
+
+ // Store LSDA address only if this catchpad belongs to a top-level
+ // catchswitch. If there is another catchpad that dominates this pad, we don't
+ // need to store LSDA address again, because they are the same throughout the
+ // function and have been already stored before.
+ // TODO Can we not store LSDA address in user function but make libcxxabi
+ // compute it?
+ auto *CPI = cast<CatchPadInst>(FPI);
+ if (isa<ConstantTokenNone>(CPI->getCatchSwitch()->getParentPad()))
+ // Pseudocode: __wasm_lpad_context.lsda = wasm.lsda();
+ IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField);
+
+ // Pseudocode: _Unwind_CallPersonality(exn);
+ CallInst *PersCI =
+ IRB.CreateCall(CallPersonalityF, Exn, OperandBundleDef("funclet", CPI));
+ PersCI->setDoesNotThrow();
+
+ // Pseudocode: int selector = __wasm.landingpad_context.selector;
+ Instruction *Selector = IRB.CreateLoad(SelectorField, "selector");
+
+ // Replace the return value from wasm.get.ehselector() with the selector value
+ // loaded from __wasm_lpad_context.selector.
+ assert(GetSelectorCI && "wasm.get.ehselector() call does not exist");
+ GetSelectorCI->replaceAllUsesWith(Selector);
+ GetSelectorCI->eraseFromParent();
+}
+
+void llvm::calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo) {
+ for (const auto &BB : *F) {
+ if (!BB.isEHPad())
+ continue;
+ const Instruction *Pad = BB.getFirstNonPHI();
+
+ // If an exception is not caught by a catchpad (i.e., it is a foreign
+ // exception), it will unwind to its parent catchswitch's unwind
+ // destination. We don't record an unwind destination for cleanuppads
+ // because every exception should be caught by it.
+ if (const auto *CatchPad = dyn_cast<CatchPadInst>(Pad)) {
+ const auto *UnwindBB = CatchPad->getCatchSwitch()->getUnwindDest();
+ if (!UnwindBB)
+ continue;
+ const Instruction *UnwindPad = UnwindBB->getFirstNonPHI();
+ if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UnwindPad))
+ // Currently there should be only one handler per a catchswitch.
+ EHInfo.setEHPadUnwindDest(&BB, *CatchSwitch->handlers().begin());
+ else // cleanuppad
+ EHInfo.setEHPadUnwindDest(&BB, UnwindBB);
+ }
+ }
+
+ // Record the unwind destination for invoke and cleanupret instructions.
+ for (const auto &BB : *F) {
+ const Instruction *TI = BB.getTerminator();
+ BasicBlock *UnwindBB = nullptr;
+ if (const auto *Invoke = dyn_cast<InvokeInst>(TI))
+ UnwindBB = Invoke->getUnwindDest();
+ else if (const auto *CleanupRet = dyn_cast<CleanupReturnInst>(TI))
+ UnwindBB = CleanupRet->getUnwindDest();
+ if (!UnwindBB)
+ continue;
+ const Instruction *UnwindPad = UnwindBB->getFirstNonPHI();
+ if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UnwindPad))
+ // Currently there should be only one handler per a catchswitch.
+ EHInfo.setThrowUnwindDest(&BB, *CatchSwitch->handlers().begin());
+ else // cleanuppad
+ EHInfo.setThrowUnwindDest(&BB, UnwindBB);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
index 0b16a113640d..e629c13f133f 100644
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
@@ -31,7 +32,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
@@ -41,7 +41,7 @@ using namespace llvm;
static cl::opt<bool> DisableDemotion(
"disable-demotion", cl::Hidden,
cl::desc(
- "Clone multicolor basic blocks but do not demote cross funclet values"),
+ "Clone multicolor basic blocks but do not demote cross scopes"),
cl::init(false));
static cl::opt<bool> DisableCleanups(
@@ -49,12 +49,17 @@ static cl::opt<bool> DisableCleanups(
cl::desc("Do not remove implausible terminators or other similar cleanups"),
cl::init(false));
+static cl::opt<bool> DemoteCatchSwitchPHIOnlyOpt(
+ "demote-catchswitch-only", cl::Hidden,
+ cl::desc("Demote catchswitch BBs only (for wasm EH)"), cl::init(false));
+
namespace {
class WinEHPrepare : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
- WinEHPrepare() : FunctionPass(ID) {}
+ WinEHPrepare(bool DemoteCatchSwitchPHIOnly = false)
+ : FunctionPass(ID), DemoteCatchSwitchPHIOnly(DemoteCatchSwitchPHIOnly) {}
bool runOnFunction(Function &Fn) override;
@@ -77,12 +82,14 @@ private:
bool prepareExplicitEH(Function &F);
void colorFunclets(Function &F);
- void demotePHIsOnFunclets(Function &F);
+ void demotePHIsOnFunclets(Function &F, bool DemoteCatchSwitchPHIOnly);
void cloneCommonBlocks(Function &F);
void removeImplausibleInstructions(Function &F);
void cleanupPreparedFunclets(Function &F);
void verifyPreparedFunclets(Function &F);
+ bool DemoteCatchSwitchPHIOnly;
+
// All fields are reset by runOnFunction.
EHPersonality Personality = EHPersonality::Unknown;
@@ -97,7 +104,9 @@ char WinEHPrepare::ID = 0;
INITIALIZE_PASS(WinEHPrepare, DEBUG_TYPE, "Prepare Windows exceptions",
false, false)
-FunctionPass *llvm::createWinEHPass() { return new WinEHPrepare(); }
+FunctionPass *llvm::createWinEHPass(bool DemoteCatchSwitchPHIOnly) {
+ return new WinEHPrepare(DemoteCatchSwitchPHIOnly);
+}
bool WinEHPrepare::runOnFunction(Function &Fn) {
if (!Fn.hasPersonalityFn())
@@ -106,8 +115,8 @@ bool WinEHPrepare::runOnFunction(Function &Fn) {
// Classify the personality to see what kind of preparation we need.
Personality = classifyEHPersonality(Fn.getPersonalityFn());
- // Do nothing if this is not a funclet-based personality.
- if (!isFuncletEHPersonality(Personality))
+ // Do nothing if this is not a scope-based personality.
+ if (!isScopedEHPersonality(Personality))
return false;
DL = &Fn.getParent()->getDataLayout();
@@ -271,10 +280,11 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,
}
int CatchHigh = FuncInfo.getLastStateNumber();
addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchHigh, Handlers);
- DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n');
- DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh << '\n');
- DEBUG(dbgs() << "CatchHigh[" << BB->getName() << "]: " << CatchHigh
- << '\n');
+ LLVM_DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n');
+ LLVM_DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh
+ << '\n');
+ LLVM_DEBUG(dbgs() << "CatchHigh[" << BB->getName() << "]: " << CatchHigh
+ << '\n');
} else {
auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI);
@@ -285,8 +295,8 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,
int CleanupState = addUnwindMapEntry(FuncInfo, ParentState, BB);
FuncInfo.EHPadStateMap[CleanupPad] = CleanupState;
- DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB "
- << BB->getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB "
+ << BB->getName() << '\n');
for (const BasicBlock *PredBlock : predecessors(BB)) {
if ((PredBlock = getEHPadFromPredecessor(PredBlock,
CleanupPad->getParentPad()))) {
@@ -351,8 +361,8 @@ static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo,
// Everything in the __try block uses TryState as its parent state.
FuncInfo.EHPadStateMap[CatchSwitch] = TryState;
- DEBUG(dbgs() << "Assigning state #" << TryState << " to BB "
- << CatchPadBB->getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Assigning state #" << TryState << " to BB "
+ << CatchPadBB->getName() << '\n');
for (const BasicBlock *PredBlock : predecessors(BB))
if ((PredBlock = getEHPadFromPredecessor(PredBlock,
CatchSwitch->getParentPad())))
@@ -387,8 +397,8 @@ static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo,
int CleanupState = addSEHFinally(FuncInfo, ParentState, BB);
FuncInfo.EHPadStateMap[CleanupPad] = CleanupState;
- DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB "
- << BB->getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB "
+ << BB->getName() << '\n');
for (const BasicBlock *PredBlock : predecessors(BB))
if ((PredBlock =
getEHPadFromPredecessor(PredBlock, CleanupPad->getParentPad())))
@@ -677,13 +687,17 @@ void WinEHPrepare::colorFunclets(Function &F) {
}
}
-void WinEHPrepare::demotePHIsOnFunclets(Function &F) {
+void WinEHPrepare::demotePHIsOnFunclets(Function &F,
+ bool DemoteCatchSwitchPHIOnly) {
// Strip PHI nodes off of EH pads.
SmallVector<PHINode *, 16> PHINodes;
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
BasicBlock *BB = &*FI++;
if (!BB->isEHPad())
continue;
+ if (DemoteCatchSwitchPHIOnly && !isa<CatchSwitchInst>(BB->getFirstNonPHI()))
+ continue;
+
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
Instruction *I = &*BI++;
auto *PN = dyn_cast<PHINode>(I);
@@ -1031,20 +1045,21 @@ bool WinEHPrepare::prepareExplicitEH(Function &F) {
cloneCommonBlocks(F);
if (!DisableDemotion)
- demotePHIsOnFunclets(F);
+ demotePHIsOnFunclets(F, DemoteCatchSwitchPHIOnly ||
+ DemoteCatchSwitchPHIOnlyOpt);
if (!DisableCleanups) {
- DEBUG(verifyFunction(F));
+ LLVM_DEBUG(verifyFunction(F));
removeImplausibleInstructions(F);
- DEBUG(verifyFunction(F));
+ LLVM_DEBUG(verifyFunction(F));
cleanupPreparedFunclets(F);
}
- DEBUG(verifyPreparedFunclets(F));
+ LLVM_DEBUG(verifyPreparedFunclets(F));
// Recolor the CFG to verify that all is well.
- DEBUG(colorFunclets(F));
- DEBUG(verifyPreparedFunclets(F));
+ LLVM_DEBUG(colorFunclets(F));
+ LLVM_DEBUG(verifyPreparedFunclets(F));
BlockColors.clear();
FuncletBlocks.clear();
diff --git a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
index 3d83afcf1fc5..32a7457c2060 100644
--- a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -52,7 +52,6 @@ struct XRayInstrumentation : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
AU.addPreserved<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -160,11 +159,26 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
for (const auto &MBB : MF)
MICount += MBB.size();
+ // Get MachineDominatorTree or compute it on the fly if it's unavailable
+ auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ MachineDominatorTree ComputedMDT;
+ if (!MDT) {
+ ComputedMDT.getBase().recalculate(MF);
+ MDT = &ComputedMDT;
+ }
+
+ // Get MachineLoopInfo or compute it on the fly if it's unavailable
+ auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ MachineLoopInfo ComputedMLI;
+ if (!MLI) {
+ ComputedMLI.getBase().analyze(MDT->getBase());
+ MLI = &ComputedMLI;
+ }
+
// Check if we have a loop.
// FIXME: Maybe make this smarter, and see whether the loops are dependent
// on inputs or side-effects?
- MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- if (MLI.empty() && MICount < XRayThreshold)
+ if (MLI->empty() && MICount < XRayThreshold)
return false; // Function is too small and has no loops.
}